From 60658a2d2f8b90d41529dd558509aac1d50da79a Mon Sep 17 00:00:00 2001 From: Vincent Danjean Date: Sun, 5 Oct 2014 20:15:18 +0100 Subject: [PATCH] Import pocl_0.10.orig.tar.gz [dgit import orig pocl_0.10.orig.tar.gz] --- CHANGES | 324 + COPYING | 19 + INSTALL | 429 + Makefile.am | 84 + Makefile.in | 1022 + README | 12 + README.ARM | 15 + README.Cell | 94 + README.OSX | 80 + README.packaging | 31 + README.powerpc | 34 + TODO | 88 + acinclude.m4 | 271 + aclocal.m4 | 1256 + config.h.in | 243 + config/ar-lib | 270 + config/compile | 142 + config/config.guess | 1530 + config/config.sub | 1782 + config/depcomp | 708 + config/install-sh | 527 + config/ltmain.sh | 9661 ++++ config/missing | 331 + config/xclang | 68 + configure | 22972 +++++++++ configure.ac | 1349 + depcomp | 632 + doc/build-envs.txt | 88 + doc/sphinx/Makefile | 93 + doc/sphinx/source/conf.py | 194 + doc/sphinx/source/design.rst | 12 + doc/sphinx/source/development.rst | 204 + doc/sphinx/source/env_variables.rst | 137 + doc/sphinx/source/faq.rst | 56 + doc/sphinx/source/features.rst | 16 + doc/sphinx/source/host_library.rst | 11 + doc/sphinx/source/index.rst | 29 + doc/sphinx/source/kernel_compiler.rst | 308 + doc/sphinx/source/memory_management.rst | 61 + doc/sphinx/source/releasing.rst | 66 + doc/sphinx/source/using.rst | 104 + examples/AMD/AMDSDK.patch | 481 + examples/AMD/Makefile.am | 258 + examples/AMD/Makefile.in | 727 + examples/AMD/README | 11 + examples/AMDSDK2.9/AMDSDK2_9.patch | 186 + examples/AMDSDK2.9/Makefile.am | 341 + examples/AMDSDK2.9/Makefile.in | 810 + examples/AMDSDK2.9/README | 11 + examples/EinsteinToolkit/EinsteinToolkit.c | 1385 + examples/EinsteinToolkit/ML_BSSN_CL_RHS1.cl | 3477 ++ examples/EinsteinToolkit/ML_BSSN_CL_RHS2.cl | 3877 ++ examples/EinsteinToolkit/Makefile.am | 31 + examples/EinsteinToolkit/Makefile.in | 646 + examples/Makefile.am | 79 + examples/Makefile.in | 736 + examples/Parboil/Makefile.am | 151 + examples/Parboil/Makefile.in | 618 + examples/Parboil/OpenCL_common.cpp.patch | 11 + examples/Parboil/README | 8 + examples/Parboil/benchmarks.patch | 67 + examples/Parboil/opencl.mk.patch | 13 + examples/Rodinia/Makefile.am | 127 + examples/Rodinia/Makefile.in | 595 + examples/Rodinia/README | 10 + examples/VexCL/Makefile.am | 39 + examples/VexCL/Makefile.in | 509 + examples/VexCL/README | 8 + examples/ViennaCL/Makefile.am | 91 + examples/ViennaCL/Makefile.in | 560 + examples/ViennaCL/README | 10 + examples/ViennaCL/bandwidth-reduction.stdout | 11 + .../blas3_solve_double-test-opencl.stdout | 2003 + .../blas3_solve_float-test-opencl.stdout | 2003 + examples/ViennaCL/custom-context.stdout | 13 + examples/ViennaCL/custom-kernels.stdout | 7 + .../ViennaCL/external_linkage-opencl.stdout | 3 + examples/ViennaCL/fft.stdout | 8 + .../global_variables-test-opencl.stdout | 10 + .../ViennaCL/iterators-test-opencl.stdout | 37 + .../matrix_col_double-test-opencl.stdout | 4386 ++ .../matrix_col_float-test-opencl.stdout | 4387 ++ .../matrix_col_int-test-opencl.stdout | 4158 ++ .../matrix_row_double-test-opencl.stdout | 4386 ++ .../matrix_row_float-test-opencl.stdout | 4387 ++ .../matrix_row_int-test-opencl.stdout | 4158 ++ .../ViennaCL/matrix_vector-test-opencl.stdout | 1821 + .../matrix_vector_int-test-opencl.stdout | 1017 + examples/ViennaCL/nmf-test-opencl.stdout | 217 + examples/ViennaCL/scalar-test-opencl.stdout | 25 + examples/ViennaCL/scheduler.stdout | 5 + .../structured-matrices-test-opencl.stdout | 50 + .../ViennaCL/vector_double-test-opencl.stdout | 905 + examples/example1-spir32/Makefile.am | 33 + examples/example1-spir32/Makefile.in | 662 + examples/example1-spir32/example1.c | 102 + examples/example1-spir32/example1.cl | 30 + examples/example1-spir32/example1.spir | Bin 0 -> 1580 bytes examples/example1-spir32/example1_exec.c | 186 + examples/example1-spir64/Makefile.am | 33 + examples/example1-spir64/Makefile.in | 661 + examples/example1-spir64/example1.c | 102 + examples/example1-spir64/example1.cl | 30 + examples/example1-spir64/example1.spir | Bin 0 -> 1604 bytes examples/example1-spir64/example1_exec.c | 186 + examples/example1/Makefile.am | 31 + examples/example1/Makefile.in | 660 + examples/example1/example1.c | 100 + examples/example1/example1.cl | 41 + examples/example1/example1_exec.c | 184 + examples/example2/Makefile.am | 31 + examples/example2/Makefile.in | 644 + examples/example2/example2.c | 225 + examples/example2/example2.cl | 163 + examples/example2a/Makefile.am | 31 + examples/example2a/Makefile.in | 644 + examples/example2a/example2a.c | 225 + examples/example2a/example2a.cl | 170 + examples/opencl-book-samples/Makefile.am | 117 + examples/opencl-book-samples/Makefile.in | 591 + examples/piglit/Makefile.am | 46 + examples/piglit/Makefile.in | 516 + examples/piglit/README | 37 + examples/scalarwave/Makefile.am | 31 + examples/scalarwave/Makefile.in | 644 + examples/scalarwave/scalarwave.c | 231 + examples/scalarwave/scalarwave.cl | 96 + examples/standalone/Makefile.am | 33 + examples/standalone/Makefile.in | 509 + examples/standalone/standalone.cl | 19 + examples/trig/Makefile.am | 31 + examples/trig/Makefile.in | 659 + examples/trig/trig.c | 105 + examples/trig/trig.cl | 13 + examples/trig/trig_exec.c | 162 + fix-include/CL/cl.h | 45 + fix-include/CL/cl.hpp | 51 + fix-include/CL/cl_gl.h | 46 + fix-include/CL/cl_platform.h | 44 + fix-include/OpenCL/cl.h | 1 + fix-include/OpenCL/cl.hpp | 1 + fix-include/OpenCL/cl_gl.h | 1 + fix-include/OpenCL/cl_platform.h | 1 + include/CL/Makefile.am | 43 + include/CL/Makefile.in | 652 + include/CL/cl.h | 1214 + include/CL/cl.hpp.in | 12452 +++++ include/CL/cl_ext.h | 251 + include/CL/cl_gl.h | 162 + include/CL/cl_gl_ext.h | 69 + include/CL/cl_platform.h | 1254 + include/CL/opencl.h | 54 + include/Makefile.am | 32 + include/Makefile.in | 783 + include/OpenCL/Makefile.am | 30 + include/OpenCL/Makefile.in | 609 + include/OpenCL/cl.h | 1 + include/OpenCL/cl.hpp | 1 + include/OpenCL/cl_ext.h | 1 + include/OpenCL/cl_gl.h | 1 + include/OpenCL/cl_gl_ext.h | 1 + include/OpenCL/cl_platform.h | 1 + include/OpenCL/opencl.h | 1 + include/_kernel.h | 2172 + include/_kernel_c.h | 301 + include/pocl.h | 193 + include/pocl_device.h | 94 + include/pocl_features.h | 35 + include/pocl_tests.h | 19 + include/pocl_types.h | 75 + include/poclu.h | 103 + include/utlist.h | 522 + lib/CL/Makefile.am | 180 + lib/CL/Makefile.in | 2602 + lib/CL/clBuildProgram.c | 320 + lib/CL/clCreateBuffer.c | 196 + lib/CL/clCreateCommandQueue.c | 88 + lib/CL/clCreateContext.c | 202 + lib/CL/clCreateContextFromType.c | 113 + lib/CL/clCreateFromGLTexture2D.c | 14 + lib/CL/clCreateFromGLTexture3D.c | 36 + lib/CL/clCreateImage.c | 183 + lib/CL/clCreateImage2D.c | 53 + lib/CL/clCreateImage3D.c | 53 + lib/CL/clCreateKernel.c | 128 + lib/CL/clCreateKernelsInProgram.c | 72 + lib/CL/clCreateProgramWithBinary.c | 170 + lib/CL/clCreateProgramWithSource.c | 132 + lib/CL/clCreateSampler.c | 54 + lib/CL/clCreateSubBuffer.c | 180 + lib/CL/clCreateUserEvent.c | 11 + lib/CL/clEnqueueBarrier.c | 47 + lib/CL/clEnqueueCopyBuffer.c | 95 + lib/CL/clEnqueueCopyBufferRect.c | 118 + lib/CL/clEnqueueCopyBufferToImage.c | 48 + lib/CL/clEnqueueCopyImage.c | 59 + lib/CL/clEnqueueCopyImageToBuffer.c | 20 + lib/CL/clEnqueueFillImage.c | 190 + lib/CL/clEnqueueMapBuffer.c | 169 + lib/CL/clEnqueueMapBuffer.h | 33 + lib/CL/clEnqueueMapImage.c | 182 + lib/CL/clEnqueueMarker.c | 50 + lib/CL/clEnqueueMarkerWithWaitList.c | 59 + lib/CL/clEnqueueNDRangeKernel.c | 330 + lib/CL/clEnqueueNativeKernel.c | 119 + lib/CL/clEnqueueReadBuffer.c | 96 + lib/CL/clEnqueueReadBufferRect.c | 112 + lib/CL/clEnqueueReadImage.c | 89 + lib/CL/clEnqueueTask.c | 28 + lib/CL/clEnqueueUnmapMemObject.c | 92 + lib/CL/clEnqueueWaitForEvents.c | 11 + lib/CL/clEnqueueWriteBuffer.c | 98 + lib/CL/clEnqueueWriteBufferRect.c | 115 + lib/CL/clEnqueueWriteImage.c | 70 + lib/CL/clFinish.c | 290 + lib/CL/clFlush.c | 37 + lib/CL/clGetCommandQueueInfo.c | 71 + lib/CL/clGetContextInfo.c | 80 + lib/CL/clGetDeviceIDs.c | 62 + lib/CL/clGetDeviceInfo.c | 278 + lib/CL/clGetEventInfo.c | 41 + lib/CL/clGetEventProfilingInfo.c | 68 + lib/CL/clGetExtensionFunctionAddress.c | 22 + lib/CL/clGetImageInfo.c | 56 + lib/CL/clGetKernelArgInfo.c | 93 + lib/CL/clGetKernelInfo.c | 78 + lib/CL/clGetKernelWorkGroupInfo.c | 105 + lib/CL/clGetMemObjectInfo.c | 76 + lib/CL/clGetPlatformIDs.c | 62 + lib/CL/clGetPlatformInfo.c | 108 + lib/CL/clGetProgramBuildInfo.c | 90 + lib/CL/clGetProgramInfo.c | 129 + lib/CL/clGetSamplerInfo.c | 15 + lib/CL/clGetSupportedImageFormats.c | 143 + lib/CL/clIcdGetPlatformIDsKHR.c | 20 + lib/CL/clReleaseCommandQueue.c | 45 + lib/CL/clReleaseContext.c | 52 + lib/CL/clReleaseDevice.c | 41 + lib/CL/clReleaseEvent.c | 44 + lib/CL/clReleaseKernel.c | 77 + lib/CL/clReleaseMemObject.c | 76 + lib/CL/clReleaseProgram.c | 74 + lib/CL/clReleaseSampler.c | 8 + lib/CL/clRetainCommandQueue.c | 32 + lib/CL/clRetainContext.c | 32 + lib/CL/clRetainDevice.c | 31 + lib/CL/clRetainEvent.c | 14 + lib/CL/clRetainKernel.c | 32 + lib/CL/clRetainMemObject.c | 32 + lib/CL/clRetainProgram.c | 32 + lib/CL/clRetainSampler.c | 10 + lib/CL/clSetEventCallback.c | 37 + lib/CL/clSetKernelArg.c | 107 + lib/CL/clSetMemObjectDestructorCallback.c | 11 + lib/CL/clSetUserEventStatus.c | 9 + lib/CL/clUnloadCompiler.c | 8 + lib/CL/clWaitForEvents.c | 38 + lib/CL/devices/Makefile.am | 52 + lib/CL/devices/Makefile.in | 829 + lib/CL/devices/basic/Makefile.am | 31 + lib/CL/devices/basic/Makefile.in | 640 + lib/CL/devices/basic/basic.c | 814 + lib/CL/devices/basic/basic.h | 48 + lib/CL/devices/bufalloc.c | 401 + lib/CL/devices/bufalloc.h | 162 + lib/CL/devices/cellspu/Makefile.am | 30 + lib/CL/devices/cellspu/Makefile.in | 638 + lib/CL/devices/cellspu/cellspu.c | 641 + lib/CL/devices/cellspu/cellspu.h | 61 + lib/CL/devices/common.c | 122 + lib/CL/devices/common.h | 83 + lib/CL/devices/cpuinfo.c | 257 + lib/CL/devices/cpuinfo.h | 35 + lib/CL/devices/dev_image.h | 47 + lib/CL/devices/devices.c | 229 + lib/CL/devices/devices.h | 81 + lib/CL/devices/prototypes.inc | 92 + lib/CL/devices/pthread/Makefile.am | 31 + lib/CL/devices/pthread/Makefile.in | 638 + lib/CL/devices/pthread/pocl-pthread.h | 41 + lib/CL/devices/pthread/pthread.c | 767 + lib/CL/devices/tce/Makefile.am | 37 + lib/CL/devices/tce/Makefile.in | 866 + lib/CL/devices/tce/tce_common.cc | 742 + lib/CL/devices/tce/tce_common.h | 152 + lib/CL/devices/tce/tta_device_main.c | 209 + lib/CL/devices/tce/ttasim/Makefile.am | 34 + lib/CL/devices/tce/ttasim/Makefile.in | 658 + lib/CL/devices/tce/ttasim/ttasim.cc | 526 + lib/CL/devices/tce/ttasim/ttasim.h | 44 + lib/CL/devices/topology/Makefile.am | 7 + lib/CL/devices/topology/Makefile.in | 616 + lib/CL/devices/topology/pocl_topology.c | 60 + lib/CL/devices/topology/pocl_topology.h | 40 + lib/CL/pocl_cl.h | 603 + lib/CL/pocl_icd.h | 165 + lib/CL/pocl_image_util.c | 180 + lib/CL/pocl_image_util.h | 61 + lib/CL/pocl_intfn.h | 114 + lib/CL/pocl_llvm.h | 105 + lib/CL/pocl_llvm_api.cc | 1363 + lib/CL/pocl_mem_management.c | 100 + lib/CL/pocl_mem_management.h | 35 + lib/CL/pocl_runtime_config.c | 105 + lib/CL/pocl_runtime_config.h | 41 + lib/CL/pocl_util.c | 307 + lib/CL/pocl_util.h | 103 + lib/Makefile.am | 26 + lib/Makefile.in | 697 + lib/kernel/Makefile.am | 52 + lib/kernel/Makefile.in | 721 + lib/kernel/abs.cl | 35 + lib/kernel/abs_diff.cl | 46 + lib/kernel/acos.cl | 27 + lib/kernel/acosh.cl | 27 + lib/kernel/acospi.cl | 27 + lib/kernel/add_sat.cl | 55 + lib/kernel/all.cl | 144 + lib/kernel/any.cl | 144 + lib/kernel/as_type.cl | 239 + lib/kernel/asin.cl | 27 + lib/kernel/asinh.cl | 27 + lib/kernel/asinpi.cl | 27 + lib/kernel/async_work_group_copy.cl | 77 + lib/kernel/atan.cl | 27 + lib/kernel/atan2.cl | 27 + lib/kernel/atan2pi.cl | 27 + lib/kernel/atanh.cl | 27 + lib/kernel/atanpi.cl | 27 + lib/kernel/atomics.cl | 204 + lib/kernel/barrier.ll | 9 + lib/kernel/bitselect.cl | 34 + lib/kernel/cbrt.cl | 27 + lib/kernel/ceil.cl | 27 + lib/kernel/cellspu/CMakeLists.txt | 50 + lib/kernel/cellspu/Makefile | 696 + lib/kernel/cellspu/Makefile.am | 33 + lib/kernel/cellspu/Makefile.in | 696 + lib/kernel/clamp.cl | 28 + lib/kernel/clamp_int.cl | 28 + lib/kernel/clz.cl | 118 + lib/kernel/convert_type.cl | 41355 ++++++++++++++++ lib/kernel/copysign.cl | 27 + lib/kernel/cos.cl | 29 + lib/kernel/cosh.cl | 27 + lib/kernel/cospi.cl | 27 + lib/kernel/cross.cl | 49 + lib/kernel/degrees.cl | 28 + lib/kernel/distance.cl | 27 + lib/kernel/divide.cl | 28 + lib/kernel/dot.cl | 85 + lib/kernel/erf.cl | 27 + lib/kernel/erfc.cl | 27 + lib/kernel/exp.cl | 30 + lib/kernel/exp10.cl | 30 + lib/kernel/exp2.cl | 30 + lib/kernel/expm1.cl | 27 + lib/kernel/fabs.cl | 27 + lib/kernel/fast_distance.cl | 27 + lib/kernel/fast_length.cl | 29 + lib/kernel/fast_normalize.cl | 30 + lib/kernel/fdim.cl | 27 + lib/kernel/floor.cl | 27 + lib/kernel/fma.cl | 30 + lib/kernel/fmax.cl | 29 + lib/kernel/fmin.cl | 29 + lib/kernel/fmod.cl | 27 + lib/kernel/fract.cl | 32 + lib/kernel/get_global_id.c | 53 + lib/kernel/get_global_offset.c | 39 + lib/kernel/get_global_size.c | 43 + lib/kernel/get_group_id.c | 38 + lib/kernel/get_image_depth.cl | 48 + lib/kernel/get_image_height.cl | 51 + lib/kernel/get_image_width.cl | 49 + lib/kernel/get_local_id.c | 38 + lib/kernel/get_local_size.c | 39 + lib/kernel/get_num_groups.c | 39 + lib/kernel/get_work_dim.c | 31 + lib/kernel/hadd.cl | 28 + lib/kernel/host/Makefile.am | 36 + lib/kernel/host/Makefile.in | 972 + lib/kernel/hypot.cl | 28 + lib/kernel/ilogb.cl | 27 + lib/kernel/isequal.cl | 27 + lib/kernel/isfinite.cl | 29 + lib/kernel/isgreater.cl | 27 + lib/kernel/isgreaterequal.cl | 27 + lib/kernel/isinf.cl | 29 + lib/kernel/isless.cl | 27 + lib/kernel/islessequal.cl | 27 + lib/kernel/islessgreater.cl | 27 + lib/kernel/isnan.cl | 29 + lib/kernel/isnormal.cl | 29 + lib/kernel/isnotequal.cl | 27 + lib/kernel/isordered.cl | 27 + lib/kernel/isunordered.cl | 27 + lib/kernel/ldexp.cl | 28 + lib/kernel/length.cl | 27 + lib/kernel/lgamma.cl | 27 + lib/kernel/log.cl | 30 + lib/kernel/log10.cl | 30 + lib/kernel/log1p.cl | 27 + lib/kernel/log2.cl | 29 + lib/kernel/logb.cl | 27 + lib/kernel/mad.cl | 27 + lib/kernel/mad24.cl | 27 + lib/kernel/mad_hi.cl | 31 + lib/kernel/mad_sat.cl | 120 + lib/kernel/max.cl | 29 + lib/kernel/max_i.cl | 28 + lib/kernel/maxmag.cl | 32 + lib/kernel/min.cl | 29 + lib/kernel/min_i.cl | 28 + lib/kernel/minmag.cl | 32 + lib/kernel/mix.cl | 28 + lib/kernel/mul24.cl | 27 + lib/kernel/mul_hi.cl | 101 + lib/kernel/nan.cl | 55 + lib/kernel/native_cos.cl | 26 + lib/kernel/native_log2.cl | 26 + lib/kernel/nextafter.cl | 27 + lib/kernel/normalize.cl | 27 + lib/kernel/pocl_image_rw_utils.h | 48 + lib/kernel/popcount.cl | 103 + lib/kernel/pow.cl | 27 + lib/kernel/pown.cl | 32 + lib/kernel/powr.cl | 30 + lib/kernel/printf.c | 458 + lib/kernel/printf_constant.c | 74 + lib/kernel/radians.cl | 28 + lib/kernel/read_image.cl | 167 + lib/kernel/recip.cl | 28 + lib/kernel/remainder.cl | 27 + lib/kernel/rhadd.cl | 28 + lib/kernel/rint.cl | 27 + lib/kernel/rootn.cl | 28 + lib/kernel/rotate.cl | 37 + lib/kernel/round.cl | 27 + lib/kernel/rsqrt.cl | 30 + lib/kernel/rules.mk | 89 + lib/kernel/select.cl | 194 + lib/kernel/shuffle.cl | 98 + lib/kernel/sign.cl | 31 + lib/kernel/signbit.cl | 75 + lib/kernel/sin.cl | 30 + lib/kernel/sincos.cl | 27 + lib/kernel/sinh.cl | 27 + lib/kernel/sinpi.cl | 27 + lib/kernel/smoothstep.cl | 32 + lib/kernel/sources-vml.mk | 277 + lib/kernel/sources.mk | 177 + lib/kernel/sqrt.cl | 153 + lib/kernel/step.cl | 39 + lib/kernel/sub_sat.cl | 54 + lib/kernel/tan.cl | 30 + lib/kernel/tanh.cl | 27 + lib/kernel/tanpi.cl | 27 + lib/kernel/tce/CMakeLists.txt | 66 + lib/kernel/tce/Makefile | 703 + lib/kernel/tce/Makefile.am | 45 + lib/kernel/tce/Makefile.in | 703 + lib/kernel/tce/get_image_width.cl.ll | 47 + lib/kernel/templates.h | 1765 + lib/kernel/tgamma.cl | 27 + lib/kernel/trunc.cl | 27 + lib/kernel/upsample.cl | 73 + lib/kernel/vecmathlib-pocl/acos.cc | 547 + lib/kernel/vecmathlib-pocl/acosh.cc | 547 + lib/kernel/vecmathlib-pocl/acospi.cl | 576 + lib/kernel/vecmathlib-pocl/asin.cc | 547 + lib/kernel/vecmathlib-pocl/asinh.cc | 547 + lib/kernel/vecmathlib-pocl/asinpi.cl | 576 + lib/kernel/vecmathlib-pocl/atan.cc | 547 + lib/kernel/vecmathlib-pocl/atan2.cl | 702 + lib/kernel/vecmathlib-pocl/atan2pi.cl | 576 + lib/kernel/vecmathlib-pocl/atanh.cc | 547 + lib/kernel/vecmathlib-pocl/atanpi.cl | 576 + lib/kernel/vecmathlib-pocl/cbrt.cc | 547 + lib/kernel/vecmathlib-pocl/ceil.cc | 547 + lib/kernel/vecmathlib-pocl/clamp.cl | 1038 + lib/kernel/vecmathlib-pocl/copysign.cc | 598 + lib/kernel/vecmathlib-pocl/cos.cc | 547 + lib/kernel/vecmathlib-pocl/cosh.cc | 547 + lib/kernel/vecmathlib-pocl/cospi.cl | 576 + lib/kernel/vecmathlib-pocl/cross.cl | 66 + lib/kernel/vecmathlib-pocl/degrees.cl | 576 + lib/kernel/vecmathlib-pocl/distance.cl | 75 + lib/kernel/vecmathlib-pocl/dot.cl | 75 + lib/kernel/vecmathlib-pocl/exp.cc | 547 + lib/kernel/vecmathlib-pocl/exp10.cc | 547 + lib/kernel/vecmathlib-pocl/exp2.cc | 547 + lib/kernel/vecmathlib-pocl/expm1.cc | 547 + lib/kernel/vecmathlib-pocl/fabs.cc | 547 + lib/kernel/vecmathlib-pocl/fast_distance.cl | 23 + lib/kernel/vecmathlib-pocl/fast_length.cl | 26 + lib/kernel/vecmathlib-pocl/fast_normalize.cl | 26 + lib/kernel/vecmathlib-pocl/fdim.cc | 598 + lib/kernel/vecmathlib-pocl/floor.cc | 547 + lib/kernel/vecmathlib-pocl/fma.cc | 649 + lib/kernel/vecmathlib-pocl/fmax.cc | 598 + lib/kernel/vecmathlib-pocl/fmax.cl | 486 + lib/kernel/vecmathlib-pocl/fmin.cc | 598 + lib/kernel/vecmathlib-pocl/fmin.cl | 486 + lib/kernel/vecmathlib-pocl/fmod.cc | 598 + lib/kernel/vecmathlib-pocl/fract.cl | 2034 + lib/kernel/vecmathlib-pocl/frexp.cl | 1926 + lib/kernel/vecmathlib-pocl/generate-files.py | 785 + lib/kernel/vecmathlib-pocl/half_cos.cl | 208 + lib/kernel/vecmathlib-pocl/half_divide.cl | 208 + lib/kernel/vecmathlib-pocl/half_exp.cl | 208 + lib/kernel/vecmathlib-pocl/half_exp10.cl | 208 + lib/kernel/vecmathlib-pocl/half_exp2.cl | 208 + lib/kernel/vecmathlib-pocl/half_log.cl | 208 + lib/kernel/vecmathlib-pocl/half_log10.cl | 208 + lib/kernel/vecmathlib-pocl/half_log2.cl | 208 + lib/kernel/vecmathlib-pocl/half_powr.cl | 208 + lib/kernel/vecmathlib-pocl/half_recip.cl | 208 + lib/kernel/vecmathlib-pocl/half_rsqrt.cl | 208 + lib/kernel/vecmathlib-pocl/half_sin.cl | 208 + lib/kernel/vecmathlib-pocl/half_sqrt.cl | 208 + lib/kernel/vecmathlib-pocl/half_tan.cl | 208 + lib/kernel/vecmathlib-pocl/hypot.cc | 598 + lib/kernel/vecmathlib-pocl/ilogb.cl | 756 + lib/kernel/vecmathlib-pocl/ilogb_.cc | 547 + lib/kernel/vecmathlib-pocl/isequal.cl | 576 + lib/kernel/vecmathlib-pocl/isfinite.cc | 547 + lib/kernel/vecmathlib-pocl/isgreater.cl | 576 + lib/kernel/vecmathlib-pocl/isgreaterequal.cl | 576 + lib/kernel/vecmathlib-pocl/isinf.cc | 547 + lib/kernel/vecmathlib-pocl/isless.cl | 576 + lib/kernel/vecmathlib-pocl/islessequal.cl | 576 + lib/kernel/vecmathlib-pocl/islessgreater.cl | 576 + lib/kernel/vecmathlib-pocl/isnan.cc | 547 + lib/kernel/vecmathlib-pocl/isnormal.cc | 547 + lib/kernel/vecmathlib-pocl/isnotequal.cl | 576 + lib/kernel/vecmathlib-pocl/isordered.cl | 576 + lib/kernel/vecmathlib-pocl/isunordered.cl | 576 + .../vecmathlib-pocl/kernel-vecmathlib.h | 4059 ++ lib/kernel/vecmathlib-pocl/ldexp.cl | 1203 + lib/kernel/vecmathlib-pocl/ldexp_.cc | 1105 + lib/kernel/vecmathlib-pocl/length.cl | 75 + lib/kernel/vecmathlib-pocl/log.cc | 547 + lib/kernel/vecmathlib-pocl/log10.cc | 547 + lib/kernel/vecmathlib-pocl/log1p.cc | 547 + lib/kernel/vecmathlib-pocl/log2.cc | 547 + lib/kernel/vecmathlib-pocl/logb.cl | 576 + lib/kernel/vecmathlib-pocl/mad.cl | 576 + lib/kernel/vecmathlib-pocl/max.cl | 1038 + lib/kernel/vecmathlib-pocl/maxmag.cl | 576 + lib/kernel/vecmathlib-pocl/min.cl | 1038 + lib/kernel/vecmathlib-pocl/minmag.cl | 576 + lib/kernel/vecmathlib-pocl/mix.cl | 1038 + lib/kernel/vecmathlib-pocl/modf.cl | 1656 + lib/kernel/vecmathlib-pocl/nan.cl | 576 + lib/kernel/vecmathlib-pocl/native_cos.cl | 208 + lib/kernel/vecmathlib-pocl/native_divide.cl | 208 + lib/kernel/vecmathlib-pocl/native_exp.cl | 208 + lib/kernel/vecmathlib-pocl/native_exp10.cl | 208 + lib/kernel/vecmathlib-pocl/native_exp2.cl | 208 + lib/kernel/vecmathlib-pocl/native_log.cl | 208 + lib/kernel/vecmathlib-pocl/native_log10.cl | 208 + lib/kernel/vecmathlib-pocl/native_log2.cl | 208 + lib/kernel/vecmathlib-pocl/native_powr.cl | 208 + lib/kernel/vecmathlib-pocl/native_recip.cl | 208 + lib/kernel/vecmathlib-pocl/native_rsqrt.cl | 208 + lib/kernel/vecmathlib-pocl/native_sin.cl | 208 + lib/kernel/vecmathlib-pocl/native_sqrt.cl | 208 + lib/kernel/vecmathlib-pocl/native_tan.cl | 208 + lib/kernel/vecmathlib-pocl/normalize.cl | 75 + lib/kernel/vecmathlib-pocl/pocl-compat.h | 132 + lib/kernel/vecmathlib-pocl/pow.cc | 598 + lib/kernel/vecmathlib-pocl/pown.cl | 576 + lib/kernel/vecmathlib-pocl/powr.cl | 576 + lib/kernel/vecmathlib-pocl/radians.cl | 576 + lib/kernel/vecmathlib-pocl/remainder.cc | 598 + lib/kernel/vecmathlib-pocl/remquo.cl | 1980 + lib/kernel/vecmathlib-pocl/rint.cc | 547 + lib/kernel/vecmathlib-pocl/rootn.cl | 576 + lib/kernel/vecmathlib-pocl/round.cc | 547 + lib/kernel/vecmathlib-pocl/rsqrt.cc | 547 + lib/kernel/vecmathlib-pocl/sign.cl | 576 + lib/kernel/vecmathlib-pocl/signbit.cc | 547 + lib/kernel/vecmathlib-pocl/sin.cc | 547 + lib/kernel/vecmathlib-pocl/sincos.cl | 1656 + lib/kernel/vecmathlib-pocl/sinh.cc | 547 + lib/kernel/vecmathlib-pocl/sinpi.cl | 576 + lib/kernel/vecmathlib-pocl/smoothstep.cl | 1038 + lib/kernel/vecmathlib-pocl/sqrt.cc | 547 + lib/kernel/vecmathlib-pocl/step.cl | 1038 + lib/kernel/vecmathlib-pocl/tan.cc | 547 + lib/kernel/vecmathlib-pocl/tanh.cc | 547 + lib/kernel/vecmathlib-pocl/tanpi.cl | 576 + lib/kernel/vecmathlib-pocl/trunc.cc | 547 + lib/kernel/vecmathlib/LICENCE | 19 + lib/kernel/vecmathlib/README | 60 + lib/kernel/vecmathlib/floatbuiltins.h | 328 + lib/kernel/vecmathlib/floatprops.h | 319 + lib/kernel/vecmathlib/floattypes.h | 231 + lib/kernel/vecmathlib/mathfuncs.h | 22 + lib/kernel/vecmathlib/mathfuncs_asin.h | 212 + lib/kernel/vecmathlib/mathfuncs_asinh.h | 42 + lib/kernel/vecmathlib/mathfuncs_base.h | 134 + lib/kernel/vecmathlib/mathfuncs_convert.h | 203 + lib/kernel/vecmathlib/mathfuncs_exp.h | 162 + lib/kernel/vecmathlib/mathfuncs_fabs.h | 207 + lib/kernel/vecmathlib/mathfuncs_int.h | 135 + lib/kernel/vecmathlib/mathfuncs_log.h | 99 + lib/kernel/vecmathlib/mathfuncs_pow.h | 36 + lib/kernel/vecmathlib/mathfuncs_rcp.h | 115 + lib/kernel/vecmathlib/mathfuncs_sin.h | 236 + lib/kernel/vecmathlib/mathfuncs_sinh.h | 34 + lib/kernel/vecmathlib/mathfuncs_sqrt.h | 130 + lib/kernel/vecmathlib/vec_altivec_float4.h | 659 + lib/kernel/vecmathlib/vec_avx_double4.h | 770 + lib/kernel/vecmathlib/vec_avx_float8.h | 769 + lib/kernel/vecmathlib/vec_avx_fp16_16.h | 610 + lib/kernel/vecmathlib/vec_avx_fp8_32.h | 676 + lib/kernel/vecmathlib/vec_base.h | 666 + lib/kernel/vecmathlib/vec_builtin.h | 1461 + lib/kernel/vecmathlib/vec_mask.h | 78 + lib/kernel/vecmathlib/vec_mic_double8.h | 708 + lib/kernel/vecmathlib/vec_neon_float2.h | 621 + lib/kernel/vecmathlib/vec_neon_float4.h | 641 + lib/kernel/vecmathlib/vec_pseudo.h | 1679 + lib/kernel/vecmathlib/vec_qpx_double4.h | 795 + lib/kernel/vecmathlib/vec_sse_double1.h | 600 + lib/kernel/vecmathlib/vec_sse_double2.h | 747 + lib/kernel/vecmathlib/vec_sse_float1.h | 594 + lib/kernel/vecmathlib/vec_sse_float4.h | 776 + lib/kernel/vecmathlib/vec_test.h | 1482 + lib/kernel/vecmathlib/vec_vsx_double2.h | 691 + lib/kernel/vecmathlib/vecmathlib.h | 242 + lib/kernel/vload.cl | 123 + lib/kernel/vload_half.cl | 154 + lib/kernel/vstore.cl | 113 + lib/kernel/vstore_half.cl | 183 + lib/kernel/wait_group_events.cl | 31 + lib/kernel/write_image.cl | 97 + lib/llvmopencl/AllocasToEntry.cc | 77 + lib/llvmopencl/AllocasToEntry.h | 53 + lib/llvmopencl/AutomaticLocals.cc | 192 + lib/llvmopencl/Barrier.h | 126 + lib/llvmopencl/BarrierBlock.cc | 55 + lib/llvmopencl/BarrierBlock.h | 44 + lib/llvmopencl/BarrierTailReplication.cc | 443 + lib/llvmopencl/BarrierTailReplication.h | 93 + lib/llvmopencl/BreakConstantGEPs.cpp | 330 + lib/llvmopencl/BreakConstantGEPs.h | 59 + lib/llvmopencl/CanonicalizeBarriers.cc | 234 + lib/llvmopencl/CanonicalizeBarriers.h | 56 + lib/llvmopencl/DebugHelpers.cc | 217 + lib/llvmopencl/DebugHelpers.h | 51 + lib/llvmopencl/Flatten.cc | 195 + lib/llvmopencl/GenerateHeader.cc | 317 + lib/llvmopencl/ImplicitConditionalBarriers.cc | 176 + lib/llvmopencl/ImplicitConditionalBarriers.h | 84 + lib/llvmopencl/ImplicitLoopBarriers.cc | 184 + lib/llvmopencl/ImplicitLoopBarriers.h | 44 + lib/llvmopencl/IsolateRegions.cc | 170 + lib/llvmopencl/IsolateRegions.h | 44 + lib/llvmopencl/Kernel.cc | 311 + lib/llvmopencl/Kernel.h | 58 + lib/llvmopencl/LLVMUtils.cc | 95 + lib/llvmopencl/LLVMUtils.h | 88 + lib/llvmopencl/LoopBarriers.cc | 213 + lib/llvmopencl/LoopBarriers.h | 47 + lib/llvmopencl/Makefile.am | 71 + lib/llvmopencl/Makefile.in | 811 + lib/llvmopencl/PHIsToAllocas.cc | 156 + lib/llvmopencl/PHIsToAllocas.h | 56 + lib/llvmopencl/ParallelRegion.cc | 832 + lib/llvmopencl/ParallelRegion.h | 135 + lib/llvmopencl/TargetAddressSpaces.cc | 285 + lib/llvmopencl/TargetAddressSpaces.h | 54 + lib/llvmopencl/VariableUniformityAnalysis.cc | 450 + lib/llvmopencl/VariableUniformityAnalysis.h | 79 + lib/llvmopencl/WIVectorize.cc | 3413 ++ lib/llvmopencl/WorkItemAliasAnalysis.cc | 159 + lib/llvmopencl/Workgroup.cc | 640 + lib/llvmopencl/Workgroup.h | 53 + lib/llvmopencl/WorkitemHandler.cc | 289 + lib/llvmopencl/WorkitemHandler.h | 83 + lib/llvmopencl/WorkitemHandlerChooser.cc | 113 + lib/llvmopencl/WorkitemHandlerChooser.h | 52 + lib/llvmopencl/WorkitemLoops.cc | 1012 + lib/llvmopencl/WorkitemLoops.h | 109 + lib/llvmopencl/WorkitemReplication.cc | 343 + lib/llvmopencl/WorkitemReplication.h | 70 + lib/llvmopencl/linker.cpp | 286 + lib/llvmopencl/linker.h | 20 + lib/poclu/Makefile.am | 32 + lib/poclu/Makefile.in | 709 + lib/poclu/bswap.c | 129 + lib/poclu/cl_half.c | 84 + lib/poclu/misc.c | 188 + m4/ax_boost_base.m4 | 258 + m4/libtool.m4 | 7983 +++ m4/ltoptions.m4 | 384 + m4/ltsugar.m4 | 123 + m4/ltversion.m4 | 23 + m4/lt~obsolete.m4 | 98 + ocl-vendors/pocl-tests.icd.in | 1 + pocl.icd.in | 1 + pocl.pc.in | 11 + scripts/Makefile.am | 65 + scripts/Makefile.in | 608 + scripts/pocl-standalone.in | 150 + tests/Makefile.am | 79 + tests/Makefile.in | 751 + tests/atlocal.in | 8 + tests/cell/Makefile.am | 25 + tests/cell/Makefile.in | 697 + tests/cell/hello/Makefile.am | 35 + tests/cell/hello/Makefile.in | 649 + tests/cell/hello/host.cpp | 213 + tests/kernel/Makefile.am | 41 + tests/kernel/Makefile.in | 732 + tests/kernel/image_query_funcs.c | 211 + tests/kernel/kernel.c | 189 + tests/kernel/sampler_address_clamp.c | 210 + tests/kernel/test_as_type.cl | 245 + tests/kernel/test_bitselect.cl | 1278 + tests/kernel/test_block.cl | 22 + tests/kernel/test_convert_sat_regression.cl | 11 + tests/kernel/test_convert_type_1.cl | 4478 ++ tests/kernel/test_convert_type_16.cl | 4478 ++ tests/kernel/test_convert_type_2.cl | 4478 ++ tests/kernel/test_convert_type_4.cl | 4478 ++ tests/kernel/test_convert_type_8.cl | 4478 ++ tests/kernel/test_fabs.cl | 328 + tests/kernel/test_fmin_fmax_fma.cl | 10 + tests/kernel/test_hadd.cl | 1721 + tests/kernel/test_image_query_funcs.cl | 19 + tests/kernel/test_min_max.cl | 23 + tests/kernel/test_printf.cl | 26 + tests/kernel/test_rotate.cl | 232 + tests/kernel/test_sampler_address_clamp.cl | 33 + tests/kernel/test_short16.cl | 105 + tests/kernel/test_shuffle.cc | 344 + tests/kernel/test_sizeof.cl | 9 + tests/package.m4 | 13 + tests/regression/Makefile.am | 46 + tests/regression/Makefile.in | 864 + ..._loop_variable_to_privvar_makes_it_local.c | 87 + ...oop_variable_to_privvar_makes_it_local_2.c | 82 + .../regression/test_barrier_before_return.cpp | 165 + .../test_barrier_between_for_loops.cpp | 202 + tests/regression/test_constant_array.cpp | 135 + tests/regression/test_early_return.cpp | 160 + .../test_for_with_var_iteration_count.cpp | 169 + .../test_fors_with_var_iteration_counts.cpp | 152 + .../test_id_dependent_computation.cpp | 184 + tests/regression/test_infinite_loop.cpp | 130 + tests/regression/test_locals.cpp | 159 + .../regression/test_loop_phi_replication.cpp | 168 + .../test_multi_level_loops_with_barriers.cpp | 178 + tests/regression/test_null_arg.cpp | 154 + tests/regression/test_setargs.cpp | 130 + .../test_simple_for_with_a_barrier.cpp | 174 + tests/regression/test_structs_as_args.cpp | 285 + .../regression/test_undominated_variable.cpp | 171 + tests/regression/test_vectors_as_args.cpp | 189 + tests/runtime/Makefile.am | 39 + tests/runtime/Makefile.in | 735 + tests/runtime/clGetKernelArgInfo.spir32_meta | Bin 0 -> 2528 bytes .../runtime/clGetKernelArgInfo.spir32_nometa | Bin 0 -> 2096 bytes tests/runtime/clGetKernelArgInfo.spir64_meta | Bin 0 -> 2596 bytes .../runtime/clGetKernelArgInfo.spir64_nometa | Bin 0 -> 2208 bytes tests/runtime/test_clBuildProgram.c | 94 + tests/runtime/test_clCreateKernel.c | 46 + tests/runtime/test_clCreateKernelsInProgram.c | 58 + .../runtime/test_clCreateKernelsInProgram.cl | 18 + .../runtime/test_clCreateProgramWithBinary.c | 166 + tests/runtime/test_clEnqueueNativeKernel.c | 186 + tests/runtime/test_clFinish.c | 253 + tests/runtime/test_clGetDeviceInfo.c | 56 + tests/runtime/test_clGetEventInfo.c | 82 + tests/runtime/test_clGetKernelArgInfo.c | 374 + .../runtime/test_clGetSupportedImageFormats.c | 50 + tests/runtime/test_clSetEventCallback.c | 153 + .../test_kernel_src_in_another_dir.h | 3 + tests/runtime/test_kernel_src_in_pwd.h | 10 + tests/runtime/test_version.c | 58 + tests/tce/Makefile.am | 30 + tests/tce/Makefile.in | 698 + tests/tce/fp16/Makefile.am | 36 + tests/tce/fp16/Makefile.in | 652 + tests/tce/fp16/host.cpp | 149 + tests/tce/tcemc/Makefile.am | 36 + tests/tce/tcemc/Makefile.in | 652 + tests/tce/tcemc/host.cpp | 223 + tests/tce/ttasim/Makefile.am | 37 + tests/tce/ttasim/Makefile.in | 651 + tests/tce/ttasim/host.cpp | 224 + tests/testsuite | 13593 +++++ tests/testsuite-amd.at | 320 + tests/testsuite-amdsdk2_9.at | 586 + tests/testsuite-parboil.at | 113 + tests/testsuite-piglit.at | 19 + tests/testsuite-regression.at | 254 + tests/testsuite-rodinia.at | 106 + tests/testsuite-runtime.at | 69 + tests/testsuite-samples.at | 114 + tests/testsuite-tce.at | 43 + tests/testsuite-vexcl.at | 80 + tests/testsuite-viennacl.at | 199 + tests/testsuite-workgroup.at | 160 + tests/testsuite.at | 430 + tests/workgroup/Makefile.am | 38 + tests/workgroup/Makefile.in | 631 + tests/workgroup/basic_barriers.cl | 17 + tests/workgroup/basic_barriers_2_2_2_2.stdout | 48 + tests/workgroup/cond_barriers_1_2_1_1.stdout | 6 + tests/workgroup/conditional_barriers.cl | 14 + tests/workgroup/for_bug.cl | 57 + tests/workgroup/for_bug_1_2_1_1.stdout | 35 + tests/workgroup/forloops.cl | 10 + tests/workgroup/forloops_2_2_1_1.stdout | 4 + tests/workgroup/implicit_barriers.cl | 70 + .../implicit_barriers_1_2_1_1.stdout | 10 + tests/workgroup/loopbarriers.cl | 17 + tests/workgroup/loopbarriers_2_2_1_1.stdout | 24 + tests/workgroup/multilatch_bloop.cl | 26 + .../workgroup/multilatch_bloop_1_3_1_1.stdout | 18 + tests/workgroup/outerlooppar.cl | 44 + tests/workgroup/outerlooppar_2_2_1_1.stdout | 26 + tests/workgroup/print_all_ids.cl | 10 + tests/workgroup/print_all_ids_114114.txt | 9 + tests/workgroup/run_kernel.c | 149 + tests/workgroup/tricky_for.cl | 21 + tests/workgroup/tricky_for_1_2_1_1.stdout | 4 + tools/data/test_machine.adf | 1805 + tools/data/test_machine_fp16.adf | 2095 + tools/patches/khronos_cl.hpp.patch | 22 + 835 files changed, 381392 insertions(+) create mode 100644 CHANGES create mode 100644 COPYING create mode 100644 INSTALL create mode 100644 Makefile.am create mode 100644 Makefile.in create mode 100644 README create mode 100644 README.ARM create mode 100644 README.Cell create mode 100644 README.OSX create mode 100644 README.packaging create mode 100644 README.powerpc create mode 100644 TODO create mode 100644 acinclude.m4 create mode 100644 aclocal.m4 create mode 100644 config.h.in create mode 100755 config/ar-lib create mode 100755 config/compile create mode 100755 config/config.guess create mode 100755 config/config.sub create mode 100755 config/depcomp create mode 100755 config/install-sh create mode 100644 config/ltmain.sh create mode 100755 config/missing create mode 100755 config/xclang create mode 100755 configure create mode 100644 configure.ac create mode 100755 depcomp create mode 100644 doc/build-envs.txt create mode 100644 doc/sphinx/Makefile create mode 100644 doc/sphinx/source/conf.py create mode 100644 doc/sphinx/source/design.rst create mode 100644 doc/sphinx/source/development.rst create mode 100644 doc/sphinx/source/env_variables.rst create mode 100644 doc/sphinx/source/faq.rst create mode 100644 doc/sphinx/source/features.rst create mode 100644 doc/sphinx/source/host_library.rst create mode 100644 doc/sphinx/source/index.rst create mode 100644 doc/sphinx/source/kernel_compiler.rst create mode 100644 doc/sphinx/source/memory_management.rst create mode 100644 doc/sphinx/source/releasing.rst create mode 100644 doc/sphinx/source/using.rst create mode 100644 examples/AMD/AMDSDK.patch create mode 100644 examples/AMD/Makefile.am create mode 100644 examples/AMD/Makefile.in create mode 100644 examples/AMD/README create mode 100644 examples/AMDSDK2.9/AMDSDK2_9.patch create mode 100644 examples/AMDSDK2.9/Makefile.am create mode 100644 examples/AMDSDK2.9/Makefile.in create mode 100644 examples/AMDSDK2.9/README create mode 100644 examples/EinsteinToolkit/EinsteinToolkit.c create mode 100644 examples/EinsteinToolkit/ML_BSSN_CL_RHS1.cl create mode 100644 examples/EinsteinToolkit/ML_BSSN_CL_RHS2.cl create mode 100644 examples/EinsteinToolkit/Makefile.am create mode 100644 examples/EinsteinToolkit/Makefile.in create mode 100644 examples/Makefile.am create mode 100644 examples/Makefile.in create mode 100644 examples/Parboil/Makefile.am create mode 100644 examples/Parboil/Makefile.in create mode 100644 examples/Parboil/OpenCL_common.cpp.patch create mode 100644 examples/Parboil/README create mode 100644 examples/Parboil/benchmarks.patch create mode 100644 examples/Parboil/opencl.mk.patch create mode 100644 examples/Rodinia/Makefile.am create mode 100644 examples/Rodinia/Makefile.in create mode 100644 examples/Rodinia/README create mode 100644 examples/VexCL/Makefile.am create mode 100644 examples/VexCL/Makefile.in create mode 100644 examples/VexCL/README create mode 100644 examples/ViennaCL/Makefile.am create mode 100644 examples/ViennaCL/Makefile.in create mode 100644 examples/ViennaCL/README create mode 100644 examples/ViennaCL/bandwidth-reduction.stdout create mode 100644 examples/ViennaCL/blas3_solve_double-test-opencl.stdout create mode 100644 examples/ViennaCL/blas3_solve_float-test-opencl.stdout create mode 100644 examples/ViennaCL/custom-context.stdout create mode 100644 examples/ViennaCL/custom-kernels.stdout create mode 100644 examples/ViennaCL/external_linkage-opencl.stdout create mode 100644 examples/ViennaCL/fft.stdout create mode 100644 examples/ViennaCL/global_variables-test-opencl.stdout create mode 100644 examples/ViennaCL/iterators-test-opencl.stdout create mode 100644 examples/ViennaCL/matrix_col_double-test-opencl.stdout create mode 100644 examples/ViennaCL/matrix_col_float-test-opencl.stdout create mode 100644 examples/ViennaCL/matrix_col_int-test-opencl.stdout create mode 100644 examples/ViennaCL/matrix_row_double-test-opencl.stdout create mode 100644 examples/ViennaCL/matrix_row_float-test-opencl.stdout create mode 100644 examples/ViennaCL/matrix_row_int-test-opencl.stdout create mode 100644 examples/ViennaCL/matrix_vector-test-opencl.stdout create mode 100644 examples/ViennaCL/matrix_vector_int-test-opencl.stdout create mode 100644 examples/ViennaCL/nmf-test-opencl.stdout create mode 100644 examples/ViennaCL/scalar-test-opencl.stdout create mode 100644 examples/ViennaCL/scheduler.stdout create mode 100644 examples/ViennaCL/structured-matrices-test-opencl.stdout create mode 100644 examples/ViennaCL/vector_double-test-opencl.stdout create mode 100644 examples/example1-spir32/Makefile.am create mode 100644 examples/example1-spir32/Makefile.in create mode 100644 examples/example1-spir32/example1.c create mode 100644 examples/example1-spir32/example1.cl create mode 100644 examples/example1-spir32/example1.spir create mode 100644 examples/example1-spir32/example1_exec.c create mode 100644 examples/example1-spir64/Makefile.am create mode 100644 examples/example1-spir64/Makefile.in create mode 100644 examples/example1-spir64/example1.c create mode 100644 examples/example1-spir64/example1.cl create mode 100644 examples/example1-spir64/example1.spir create mode 100644 examples/example1-spir64/example1_exec.c create mode 100644 examples/example1/Makefile.am create mode 100644 examples/example1/Makefile.in create mode 100644 examples/example1/example1.c create mode 100644 examples/example1/example1.cl create mode 100644 examples/example1/example1_exec.c create mode 100644 examples/example2/Makefile.am create mode 100644 examples/example2/Makefile.in create mode 100644 examples/example2/example2.c create mode 100644 examples/example2/example2.cl create mode 100644 examples/example2a/Makefile.am create mode 100644 examples/example2a/Makefile.in create mode 100644 examples/example2a/example2a.c create mode 100644 examples/example2a/example2a.cl create mode 100644 examples/opencl-book-samples/Makefile.am create mode 100644 examples/opencl-book-samples/Makefile.in create mode 100644 examples/piglit/Makefile.am create mode 100644 examples/piglit/Makefile.in create mode 100644 examples/piglit/README create mode 100644 examples/scalarwave/Makefile.am create mode 100644 examples/scalarwave/Makefile.in create mode 100644 examples/scalarwave/scalarwave.c create mode 100644 examples/scalarwave/scalarwave.cl create mode 100644 examples/standalone/Makefile.am create mode 100644 examples/standalone/Makefile.in create mode 100644 examples/standalone/standalone.cl create mode 100644 examples/trig/Makefile.am create mode 100644 examples/trig/Makefile.in create mode 100644 examples/trig/trig.c create mode 100644 examples/trig/trig.cl create mode 100644 examples/trig/trig_exec.c create mode 100644 fix-include/CL/cl.h create mode 100644 fix-include/CL/cl.hpp create mode 100644 fix-include/CL/cl_gl.h create mode 100644 fix-include/CL/cl_platform.h create mode 100644 fix-include/OpenCL/cl.h create mode 100644 fix-include/OpenCL/cl.hpp create mode 100644 fix-include/OpenCL/cl_gl.h create mode 100644 fix-include/OpenCL/cl_platform.h create mode 100644 include/CL/Makefile.am create mode 100644 include/CL/Makefile.in create mode 100644 include/CL/cl.h create mode 100644 include/CL/cl.hpp.in create mode 100644 include/CL/cl_ext.h create mode 100644 include/CL/cl_gl.h create mode 100644 include/CL/cl_gl_ext.h create mode 100644 include/CL/cl_platform.h create mode 100644 include/CL/opencl.h create mode 100644 include/Makefile.am create mode 100644 include/Makefile.in create mode 100644 include/OpenCL/Makefile.am create mode 100644 include/OpenCL/Makefile.in create mode 100644 include/OpenCL/cl.h create mode 100644 include/OpenCL/cl.hpp create mode 100644 include/OpenCL/cl_ext.h create mode 100644 include/OpenCL/cl_gl.h create mode 100644 include/OpenCL/cl_gl_ext.h create mode 100644 include/OpenCL/cl_platform.h create mode 100644 include/OpenCL/opencl.h create mode 100644 include/_kernel.h create mode 100644 include/_kernel_c.h create mode 100644 include/pocl.h create mode 100644 include/pocl_device.h create mode 100644 include/pocl_features.h create mode 100644 include/pocl_tests.h create mode 100644 include/pocl_types.h create mode 100644 include/poclu.h create mode 100644 include/utlist.h create mode 100644 lib/CL/Makefile.am create mode 100644 lib/CL/Makefile.in create mode 100644 lib/CL/clBuildProgram.c create mode 100644 lib/CL/clCreateBuffer.c create mode 100644 lib/CL/clCreateCommandQueue.c create mode 100644 lib/CL/clCreateContext.c create mode 100644 lib/CL/clCreateContextFromType.c create mode 100644 lib/CL/clCreateFromGLTexture2D.c create mode 100644 lib/CL/clCreateFromGLTexture3D.c create mode 100644 lib/CL/clCreateImage.c create mode 100644 lib/CL/clCreateImage2D.c create mode 100644 lib/CL/clCreateImage3D.c create mode 100644 lib/CL/clCreateKernel.c create mode 100644 lib/CL/clCreateKernelsInProgram.c create mode 100644 lib/CL/clCreateProgramWithBinary.c create mode 100644 lib/CL/clCreateProgramWithSource.c create mode 100644 lib/CL/clCreateSampler.c create mode 100644 lib/CL/clCreateSubBuffer.c create mode 100644 lib/CL/clCreateUserEvent.c create mode 100644 lib/CL/clEnqueueBarrier.c create mode 100644 lib/CL/clEnqueueCopyBuffer.c create mode 100644 lib/CL/clEnqueueCopyBufferRect.c create mode 100644 lib/CL/clEnqueueCopyBufferToImage.c create mode 100644 lib/CL/clEnqueueCopyImage.c create mode 100644 lib/CL/clEnqueueCopyImageToBuffer.c create mode 100644 lib/CL/clEnqueueFillImage.c create mode 100644 lib/CL/clEnqueueMapBuffer.c create mode 100644 lib/CL/clEnqueueMapBuffer.h create mode 100644 lib/CL/clEnqueueMapImage.c create mode 100644 lib/CL/clEnqueueMarker.c create mode 100644 lib/CL/clEnqueueMarkerWithWaitList.c create mode 100644 lib/CL/clEnqueueNDRangeKernel.c create mode 100644 lib/CL/clEnqueueNativeKernel.c create mode 100644 lib/CL/clEnqueueReadBuffer.c create mode 100644 lib/CL/clEnqueueReadBufferRect.c create mode 100644 lib/CL/clEnqueueReadImage.c create mode 100644 lib/CL/clEnqueueTask.c create mode 100644 lib/CL/clEnqueueUnmapMemObject.c create mode 100644 lib/CL/clEnqueueWaitForEvents.c create mode 100644 lib/CL/clEnqueueWriteBuffer.c create mode 100644 lib/CL/clEnqueueWriteBufferRect.c create mode 100644 lib/CL/clEnqueueWriteImage.c create mode 100644 lib/CL/clFinish.c create mode 100644 lib/CL/clFlush.c create mode 100644 lib/CL/clGetCommandQueueInfo.c create mode 100644 lib/CL/clGetContextInfo.c create mode 100644 lib/CL/clGetDeviceIDs.c create mode 100644 lib/CL/clGetDeviceInfo.c create mode 100644 lib/CL/clGetEventInfo.c create mode 100644 lib/CL/clGetEventProfilingInfo.c create mode 100644 lib/CL/clGetExtensionFunctionAddress.c create mode 100644 lib/CL/clGetImageInfo.c create mode 100644 lib/CL/clGetKernelArgInfo.c create mode 100644 lib/CL/clGetKernelInfo.c create mode 100644 lib/CL/clGetKernelWorkGroupInfo.c create mode 100644 lib/CL/clGetMemObjectInfo.c create mode 100644 lib/CL/clGetPlatformIDs.c create mode 100644 lib/CL/clGetPlatformInfo.c create mode 100644 lib/CL/clGetProgramBuildInfo.c create mode 100644 lib/CL/clGetProgramInfo.c create mode 100644 lib/CL/clGetSamplerInfo.c create mode 100644 lib/CL/clGetSupportedImageFormats.c create mode 100644 lib/CL/clIcdGetPlatformIDsKHR.c create mode 100644 lib/CL/clReleaseCommandQueue.c create mode 100644 lib/CL/clReleaseContext.c create mode 100644 lib/CL/clReleaseDevice.c create mode 100644 lib/CL/clReleaseEvent.c create mode 100644 lib/CL/clReleaseKernel.c create mode 100644 lib/CL/clReleaseMemObject.c create mode 100644 lib/CL/clReleaseProgram.c create mode 100644 lib/CL/clReleaseSampler.c create mode 100644 lib/CL/clRetainCommandQueue.c create mode 100644 lib/CL/clRetainContext.c create mode 100644 lib/CL/clRetainDevice.c create mode 100644 lib/CL/clRetainEvent.c create mode 100644 lib/CL/clRetainKernel.c create mode 100644 lib/CL/clRetainMemObject.c create mode 100644 lib/CL/clRetainProgram.c create mode 100644 lib/CL/clRetainSampler.c create mode 100644 lib/CL/clSetEventCallback.c create mode 100644 lib/CL/clSetKernelArg.c create mode 100644 lib/CL/clSetMemObjectDestructorCallback.c create mode 100644 lib/CL/clSetUserEventStatus.c create mode 100644 lib/CL/clUnloadCompiler.c create mode 100644 lib/CL/clWaitForEvents.c create mode 100644 lib/CL/devices/Makefile.am create mode 100644 lib/CL/devices/Makefile.in create mode 100644 lib/CL/devices/basic/Makefile.am create mode 100644 lib/CL/devices/basic/Makefile.in create mode 100644 lib/CL/devices/basic/basic.c create mode 100644 lib/CL/devices/basic/basic.h create mode 100644 lib/CL/devices/bufalloc.c create mode 100644 lib/CL/devices/bufalloc.h create mode 100644 lib/CL/devices/cellspu/Makefile.am create mode 100644 lib/CL/devices/cellspu/Makefile.in create mode 100644 lib/CL/devices/cellspu/cellspu.c create mode 100644 lib/CL/devices/cellspu/cellspu.h create mode 100644 lib/CL/devices/common.c create mode 100644 lib/CL/devices/common.h create mode 100644 lib/CL/devices/cpuinfo.c create mode 100644 lib/CL/devices/cpuinfo.h create mode 100644 lib/CL/devices/dev_image.h create mode 100644 lib/CL/devices/devices.c create mode 100644 lib/CL/devices/devices.h create mode 100644 lib/CL/devices/prototypes.inc create mode 100644 lib/CL/devices/pthread/Makefile.am create mode 100644 lib/CL/devices/pthread/Makefile.in create mode 100644 lib/CL/devices/pthread/pocl-pthread.h create mode 100644 lib/CL/devices/pthread/pthread.c create mode 100644 lib/CL/devices/tce/Makefile.am create mode 100644 lib/CL/devices/tce/Makefile.in create mode 100644 lib/CL/devices/tce/tce_common.cc create mode 100644 lib/CL/devices/tce/tce_common.h create mode 100644 lib/CL/devices/tce/tta_device_main.c create mode 100644 lib/CL/devices/tce/ttasim/Makefile.am create mode 100644 lib/CL/devices/tce/ttasim/Makefile.in create mode 100644 lib/CL/devices/tce/ttasim/ttasim.cc create mode 100644 lib/CL/devices/tce/ttasim/ttasim.h create mode 100644 lib/CL/devices/topology/Makefile.am create mode 100644 lib/CL/devices/topology/Makefile.in create mode 100644 lib/CL/devices/topology/pocl_topology.c create mode 100644 lib/CL/devices/topology/pocl_topology.h create mode 100644 lib/CL/pocl_cl.h create mode 100644 lib/CL/pocl_icd.h create mode 100644 lib/CL/pocl_image_util.c create mode 100644 lib/CL/pocl_image_util.h create mode 100644 lib/CL/pocl_intfn.h create mode 100644 lib/CL/pocl_llvm.h create mode 100644 lib/CL/pocl_llvm_api.cc create mode 100644 lib/CL/pocl_mem_management.c create mode 100644 lib/CL/pocl_mem_management.h create mode 100644 lib/CL/pocl_runtime_config.c create mode 100644 lib/CL/pocl_runtime_config.h create mode 100644 lib/CL/pocl_util.c create mode 100644 lib/CL/pocl_util.h create mode 100644 lib/Makefile.am create mode 100644 lib/Makefile.in create mode 100644 lib/kernel/Makefile.am create mode 100644 lib/kernel/Makefile.in create mode 100644 lib/kernel/abs.cl create mode 100644 lib/kernel/abs_diff.cl create mode 100644 lib/kernel/acos.cl create mode 100644 lib/kernel/acosh.cl create mode 100644 lib/kernel/acospi.cl create mode 100644 lib/kernel/add_sat.cl create mode 100644 lib/kernel/all.cl create mode 100644 lib/kernel/any.cl create mode 100644 lib/kernel/as_type.cl create mode 100644 lib/kernel/asin.cl create mode 100644 lib/kernel/asinh.cl create mode 100644 lib/kernel/asinpi.cl create mode 100644 lib/kernel/async_work_group_copy.cl create mode 100644 lib/kernel/atan.cl create mode 100644 lib/kernel/atan2.cl create mode 100644 lib/kernel/atan2pi.cl create mode 100644 lib/kernel/atanh.cl create mode 100644 lib/kernel/atanpi.cl create mode 100644 lib/kernel/atomics.cl create mode 100644 lib/kernel/barrier.ll create mode 100644 lib/kernel/bitselect.cl create mode 100644 lib/kernel/cbrt.cl create mode 100644 lib/kernel/ceil.cl create mode 100644 lib/kernel/cellspu/CMakeLists.txt create mode 100644 lib/kernel/cellspu/Makefile create mode 100644 lib/kernel/cellspu/Makefile.am create mode 100644 lib/kernel/cellspu/Makefile.in create mode 100644 lib/kernel/clamp.cl create mode 100644 lib/kernel/clamp_int.cl create mode 100644 lib/kernel/clz.cl create mode 100644 lib/kernel/convert_type.cl create mode 100644 lib/kernel/copysign.cl create mode 100644 lib/kernel/cos.cl create mode 100644 lib/kernel/cosh.cl create mode 100644 lib/kernel/cospi.cl create mode 100644 lib/kernel/cross.cl create mode 100644 lib/kernel/degrees.cl create mode 100644 lib/kernel/distance.cl create mode 100644 lib/kernel/divide.cl create mode 100644 lib/kernel/dot.cl create mode 100644 lib/kernel/erf.cl create mode 100644 lib/kernel/erfc.cl create mode 100644 lib/kernel/exp.cl create mode 100644 lib/kernel/exp10.cl create mode 100644 lib/kernel/exp2.cl create mode 100644 lib/kernel/expm1.cl create mode 100644 lib/kernel/fabs.cl create mode 100644 lib/kernel/fast_distance.cl create mode 100644 lib/kernel/fast_length.cl create mode 100644 lib/kernel/fast_normalize.cl create mode 100644 lib/kernel/fdim.cl create mode 100644 lib/kernel/floor.cl create mode 100644 lib/kernel/fma.cl create mode 100644 lib/kernel/fmax.cl create mode 100644 lib/kernel/fmin.cl create mode 100644 lib/kernel/fmod.cl create mode 100644 lib/kernel/fract.cl create mode 100644 lib/kernel/get_global_id.c create mode 100644 lib/kernel/get_global_offset.c create mode 100644 lib/kernel/get_global_size.c create mode 100644 lib/kernel/get_group_id.c create mode 100644 lib/kernel/get_image_depth.cl create mode 100644 lib/kernel/get_image_height.cl create mode 100644 lib/kernel/get_image_width.cl create mode 100644 lib/kernel/get_local_id.c create mode 100644 lib/kernel/get_local_size.c create mode 100644 lib/kernel/get_num_groups.c create mode 100644 lib/kernel/get_work_dim.c create mode 100644 lib/kernel/hadd.cl create mode 100644 lib/kernel/host/Makefile.am create mode 100644 lib/kernel/host/Makefile.in create mode 100644 lib/kernel/hypot.cl create mode 100644 lib/kernel/ilogb.cl create mode 100644 lib/kernel/isequal.cl create mode 100644 lib/kernel/isfinite.cl create mode 100644 lib/kernel/isgreater.cl create mode 100644 lib/kernel/isgreaterequal.cl create mode 100644 lib/kernel/isinf.cl create mode 100644 lib/kernel/isless.cl create mode 100644 lib/kernel/islessequal.cl create mode 100644 lib/kernel/islessgreater.cl create mode 100644 lib/kernel/isnan.cl create mode 100644 lib/kernel/isnormal.cl create mode 100644 lib/kernel/isnotequal.cl create mode 100644 lib/kernel/isordered.cl create mode 100644 lib/kernel/isunordered.cl create mode 100644 lib/kernel/ldexp.cl create mode 100644 lib/kernel/length.cl create mode 100644 lib/kernel/lgamma.cl create mode 100644 lib/kernel/log.cl create mode 100644 lib/kernel/log10.cl create mode 100644 lib/kernel/log1p.cl create mode 100644 lib/kernel/log2.cl create mode 100644 lib/kernel/logb.cl create mode 100644 lib/kernel/mad.cl create mode 100644 lib/kernel/mad24.cl create mode 100644 lib/kernel/mad_hi.cl create mode 100644 lib/kernel/mad_sat.cl create mode 100644 lib/kernel/max.cl create mode 100644 lib/kernel/max_i.cl create mode 100644 lib/kernel/maxmag.cl create mode 100644 lib/kernel/min.cl create mode 100644 lib/kernel/min_i.cl create mode 100644 lib/kernel/minmag.cl create mode 100644 lib/kernel/mix.cl create mode 100644 lib/kernel/mul24.cl create mode 100644 lib/kernel/mul_hi.cl create mode 100644 lib/kernel/nan.cl create mode 100644 lib/kernel/native_cos.cl create mode 100644 lib/kernel/native_log2.cl create mode 100644 lib/kernel/nextafter.cl create mode 100644 lib/kernel/normalize.cl create mode 100644 lib/kernel/pocl_image_rw_utils.h create mode 100644 lib/kernel/popcount.cl create mode 100644 lib/kernel/pow.cl create mode 100644 lib/kernel/pown.cl create mode 100644 lib/kernel/powr.cl create mode 100644 lib/kernel/printf.c create mode 100644 lib/kernel/printf_constant.c create mode 100644 lib/kernel/radians.cl create mode 100644 lib/kernel/read_image.cl create mode 100644 lib/kernel/recip.cl create mode 100644 lib/kernel/remainder.cl create mode 100644 lib/kernel/rhadd.cl create mode 100644 lib/kernel/rint.cl create mode 100644 lib/kernel/rootn.cl create mode 100644 lib/kernel/rotate.cl create mode 100644 lib/kernel/round.cl create mode 100644 lib/kernel/rsqrt.cl create mode 100644 lib/kernel/rules.mk create mode 100644 lib/kernel/select.cl create mode 100644 lib/kernel/shuffle.cl create mode 100644 lib/kernel/sign.cl create mode 100644 lib/kernel/signbit.cl create mode 100644 lib/kernel/sin.cl create mode 100644 lib/kernel/sincos.cl create mode 100644 lib/kernel/sinh.cl create mode 100644 lib/kernel/sinpi.cl create mode 100644 lib/kernel/smoothstep.cl create mode 100644 lib/kernel/sources-vml.mk create mode 100644 lib/kernel/sources.mk create mode 100644 lib/kernel/sqrt.cl create mode 100644 lib/kernel/step.cl create mode 100644 lib/kernel/sub_sat.cl create mode 100644 lib/kernel/tan.cl create mode 100644 lib/kernel/tanh.cl create mode 100644 lib/kernel/tanpi.cl create mode 100644 lib/kernel/tce/CMakeLists.txt create mode 100644 lib/kernel/tce/Makefile create mode 100644 lib/kernel/tce/Makefile.am create mode 100644 lib/kernel/tce/Makefile.in create mode 100644 lib/kernel/tce/get_image_width.cl.ll create mode 100644 lib/kernel/templates.h create mode 100644 lib/kernel/tgamma.cl create mode 100644 lib/kernel/trunc.cl create mode 100644 lib/kernel/upsample.cl create mode 100644 lib/kernel/vecmathlib-pocl/acos.cc create mode 100644 lib/kernel/vecmathlib-pocl/acosh.cc create mode 100644 lib/kernel/vecmathlib-pocl/acospi.cl create mode 100644 lib/kernel/vecmathlib-pocl/asin.cc create mode 100644 lib/kernel/vecmathlib-pocl/asinh.cc create mode 100644 lib/kernel/vecmathlib-pocl/asinpi.cl create mode 100644 lib/kernel/vecmathlib-pocl/atan.cc create mode 100644 lib/kernel/vecmathlib-pocl/atan2.cl create mode 100644 lib/kernel/vecmathlib-pocl/atan2pi.cl create mode 100644 lib/kernel/vecmathlib-pocl/atanh.cc create mode 100644 lib/kernel/vecmathlib-pocl/atanpi.cl create mode 100644 lib/kernel/vecmathlib-pocl/cbrt.cc create mode 100644 lib/kernel/vecmathlib-pocl/ceil.cc create mode 100644 lib/kernel/vecmathlib-pocl/clamp.cl create mode 100644 lib/kernel/vecmathlib-pocl/copysign.cc create mode 100644 lib/kernel/vecmathlib-pocl/cos.cc create mode 100644 lib/kernel/vecmathlib-pocl/cosh.cc create mode 100644 lib/kernel/vecmathlib-pocl/cospi.cl create mode 100644 lib/kernel/vecmathlib-pocl/cross.cl create mode 100644 lib/kernel/vecmathlib-pocl/degrees.cl create mode 100644 lib/kernel/vecmathlib-pocl/distance.cl create mode 100644 lib/kernel/vecmathlib-pocl/dot.cl create mode 100644 lib/kernel/vecmathlib-pocl/exp.cc create mode 100644 lib/kernel/vecmathlib-pocl/exp10.cc create mode 100644 lib/kernel/vecmathlib-pocl/exp2.cc create mode 100644 lib/kernel/vecmathlib-pocl/expm1.cc create mode 100644 lib/kernel/vecmathlib-pocl/fabs.cc create mode 100644 lib/kernel/vecmathlib-pocl/fast_distance.cl create mode 100644 lib/kernel/vecmathlib-pocl/fast_length.cl create mode 100644 lib/kernel/vecmathlib-pocl/fast_normalize.cl create mode 100644 lib/kernel/vecmathlib-pocl/fdim.cc create mode 100644 lib/kernel/vecmathlib-pocl/floor.cc create mode 100644 lib/kernel/vecmathlib-pocl/fma.cc create mode 100644 lib/kernel/vecmathlib-pocl/fmax.cc create mode 100644 lib/kernel/vecmathlib-pocl/fmax.cl create mode 100644 lib/kernel/vecmathlib-pocl/fmin.cc create mode 100644 lib/kernel/vecmathlib-pocl/fmin.cl create mode 100644 lib/kernel/vecmathlib-pocl/fmod.cc create mode 100644 lib/kernel/vecmathlib-pocl/fract.cl create mode 100644 lib/kernel/vecmathlib-pocl/frexp.cl create mode 100755 lib/kernel/vecmathlib-pocl/generate-files.py create mode 100644 lib/kernel/vecmathlib-pocl/half_cos.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_divide.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_exp.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_exp10.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_exp2.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_log.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_log10.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_log2.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_powr.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_recip.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_rsqrt.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_sin.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_sqrt.cl create mode 100644 lib/kernel/vecmathlib-pocl/half_tan.cl create mode 100644 lib/kernel/vecmathlib-pocl/hypot.cc create mode 100644 lib/kernel/vecmathlib-pocl/ilogb.cl create mode 100644 lib/kernel/vecmathlib-pocl/ilogb_.cc create mode 100644 lib/kernel/vecmathlib-pocl/isequal.cl create mode 100644 lib/kernel/vecmathlib-pocl/isfinite.cc create mode 100644 lib/kernel/vecmathlib-pocl/isgreater.cl create mode 100644 lib/kernel/vecmathlib-pocl/isgreaterequal.cl create mode 100644 lib/kernel/vecmathlib-pocl/isinf.cc create mode 100644 lib/kernel/vecmathlib-pocl/isless.cl create mode 100644 lib/kernel/vecmathlib-pocl/islessequal.cl create mode 100644 lib/kernel/vecmathlib-pocl/islessgreater.cl create mode 100644 lib/kernel/vecmathlib-pocl/isnan.cc create mode 100644 lib/kernel/vecmathlib-pocl/isnormal.cc create mode 100644 lib/kernel/vecmathlib-pocl/isnotequal.cl create mode 100644 lib/kernel/vecmathlib-pocl/isordered.cl create mode 100644 lib/kernel/vecmathlib-pocl/isunordered.cl create mode 100644 lib/kernel/vecmathlib-pocl/kernel-vecmathlib.h create mode 100644 lib/kernel/vecmathlib-pocl/ldexp.cl create mode 100644 lib/kernel/vecmathlib-pocl/ldexp_.cc create mode 100644 lib/kernel/vecmathlib-pocl/length.cl create mode 100644 lib/kernel/vecmathlib-pocl/log.cc create mode 100644 lib/kernel/vecmathlib-pocl/log10.cc create mode 100644 lib/kernel/vecmathlib-pocl/log1p.cc create mode 100644 lib/kernel/vecmathlib-pocl/log2.cc create mode 100644 lib/kernel/vecmathlib-pocl/logb.cl create mode 100644 lib/kernel/vecmathlib-pocl/mad.cl create mode 100644 lib/kernel/vecmathlib-pocl/max.cl create mode 100644 lib/kernel/vecmathlib-pocl/maxmag.cl create mode 100644 lib/kernel/vecmathlib-pocl/min.cl create mode 100644 lib/kernel/vecmathlib-pocl/minmag.cl create mode 100644 lib/kernel/vecmathlib-pocl/mix.cl create mode 100644 lib/kernel/vecmathlib-pocl/modf.cl create mode 100644 lib/kernel/vecmathlib-pocl/nan.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_cos.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_divide.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_exp.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_exp10.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_exp2.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_log.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_log10.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_log2.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_powr.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_recip.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_rsqrt.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_sin.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_sqrt.cl create mode 100644 lib/kernel/vecmathlib-pocl/native_tan.cl create mode 100644 lib/kernel/vecmathlib-pocl/normalize.cl create mode 100644 lib/kernel/vecmathlib-pocl/pocl-compat.h create mode 100644 lib/kernel/vecmathlib-pocl/pow.cc create mode 100644 lib/kernel/vecmathlib-pocl/pown.cl create mode 100644 lib/kernel/vecmathlib-pocl/powr.cl create mode 100644 lib/kernel/vecmathlib-pocl/radians.cl create mode 100644 lib/kernel/vecmathlib-pocl/remainder.cc create mode 100644 lib/kernel/vecmathlib-pocl/remquo.cl create mode 100644 lib/kernel/vecmathlib-pocl/rint.cc create mode 100644 lib/kernel/vecmathlib-pocl/rootn.cl create mode 100644 lib/kernel/vecmathlib-pocl/round.cc create mode 100644 lib/kernel/vecmathlib-pocl/rsqrt.cc create mode 100644 lib/kernel/vecmathlib-pocl/sign.cl create mode 100644 lib/kernel/vecmathlib-pocl/signbit.cc create mode 100644 lib/kernel/vecmathlib-pocl/sin.cc create mode 100644 lib/kernel/vecmathlib-pocl/sincos.cl create mode 100644 lib/kernel/vecmathlib-pocl/sinh.cc create mode 100644 lib/kernel/vecmathlib-pocl/sinpi.cl create mode 100644 lib/kernel/vecmathlib-pocl/smoothstep.cl create mode 100644 lib/kernel/vecmathlib-pocl/sqrt.cc create mode 100644 lib/kernel/vecmathlib-pocl/step.cl create mode 100644 lib/kernel/vecmathlib-pocl/tan.cc create mode 100644 lib/kernel/vecmathlib-pocl/tanh.cc create mode 100644 lib/kernel/vecmathlib-pocl/tanpi.cl create mode 100644 lib/kernel/vecmathlib-pocl/trunc.cc create mode 100644 lib/kernel/vecmathlib/LICENCE create mode 100644 lib/kernel/vecmathlib/README create mode 100644 lib/kernel/vecmathlib/floatbuiltins.h create mode 100644 lib/kernel/vecmathlib/floatprops.h create mode 100644 lib/kernel/vecmathlib/floattypes.h create mode 100644 lib/kernel/vecmathlib/mathfuncs.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_asin.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_asinh.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_base.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_convert.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_exp.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_fabs.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_int.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_log.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_pow.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_rcp.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_sin.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_sinh.h create mode 100644 lib/kernel/vecmathlib/mathfuncs_sqrt.h create mode 100644 lib/kernel/vecmathlib/vec_altivec_float4.h create mode 100644 lib/kernel/vecmathlib/vec_avx_double4.h create mode 100644 lib/kernel/vecmathlib/vec_avx_float8.h create mode 100644 lib/kernel/vecmathlib/vec_avx_fp16_16.h create mode 100644 lib/kernel/vecmathlib/vec_avx_fp8_32.h create mode 100644 lib/kernel/vecmathlib/vec_base.h create mode 100644 lib/kernel/vecmathlib/vec_builtin.h create mode 100644 lib/kernel/vecmathlib/vec_mask.h create mode 100644 lib/kernel/vecmathlib/vec_mic_double8.h create mode 100644 lib/kernel/vecmathlib/vec_neon_float2.h create mode 100644 lib/kernel/vecmathlib/vec_neon_float4.h create mode 100644 lib/kernel/vecmathlib/vec_pseudo.h create mode 100644 lib/kernel/vecmathlib/vec_qpx_double4.h create mode 100644 lib/kernel/vecmathlib/vec_sse_double1.h create mode 100644 lib/kernel/vecmathlib/vec_sse_double2.h create mode 100644 lib/kernel/vecmathlib/vec_sse_float1.h create mode 100644 lib/kernel/vecmathlib/vec_sse_float4.h create mode 100644 lib/kernel/vecmathlib/vec_test.h create mode 100644 lib/kernel/vecmathlib/vec_vsx_double2.h create mode 100644 lib/kernel/vecmathlib/vecmathlib.h create mode 100644 lib/kernel/vload.cl create mode 100644 lib/kernel/vload_half.cl create mode 100644 lib/kernel/vstore.cl create mode 100644 lib/kernel/vstore_half.cl create mode 100644 lib/kernel/wait_group_events.cl create mode 100644 lib/kernel/write_image.cl create mode 100644 lib/llvmopencl/AllocasToEntry.cc create mode 100644 lib/llvmopencl/AllocasToEntry.h create mode 100644 lib/llvmopencl/AutomaticLocals.cc create mode 100644 lib/llvmopencl/Barrier.h create mode 100644 lib/llvmopencl/BarrierBlock.cc create mode 100644 lib/llvmopencl/BarrierBlock.h create mode 100644 lib/llvmopencl/BarrierTailReplication.cc create mode 100644 lib/llvmopencl/BarrierTailReplication.h create mode 100644 lib/llvmopencl/BreakConstantGEPs.cpp create mode 100644 lib/llvmopencl/BreakConstantGEPs.h create mode 100644 lib/llvmopencl/CanonicalizeBarriers.cc create mode 100644 lib/llvmopencl/CanonicalizeBarriers.h create mode 100644 lib/llvmopencl/DebugHelpers.cc create mode 100644 lib/llvmopencl/DebugHelpers.h create mode 100644 lib/llvmopencl/Flatten.cc create mode 100644 lib/llvmopencl/GenerateHeader.cc create mode 100644 lib/llvmopencl/ImplicitConditionalBarriers.cc create mode 100644 lib/llvmopencl/ImplicitConditionalBarriers.h create mode 100644 lib/llvmopencl/ImplicitLoopBarriers.cc create mode 100644 lib/llvmopencl/ImplicitLoopBarriers.h create mode 100644 lib/llvmopencl/IsolateRegions.cc create mode 100644 lib/llvmopencl/IsolateRegions.h create mode 100644 lib/llvmopencl/Kernel.cc create mode 100644 lib/llvmopencl/Kernel.h create mode 100644 lib/llvmopencl/LLVMUtils.cc create mode 100644 lib/llvmopencl/LLVMUtils.h create mode 100644 lib/llvmopencl/LoopBarriers.cc create mode 100644 lib/llvmopencl/LoopBarriers.h create mode 100644 lib/llvmopencl/Makefile.am create mode 100644 lib/llvmopencl/Makefile.in create mode 100644 lib/llvmopencl/PHIsToAllocas.cc create mode 100644 lib/llvmopencl/PHIsToAllocas.h create mode 100644 lib/llvmopencl/ParallelRegion.cc create mode 100644 lib/llvmopencl/ParallelRegion.h create mode 100644 lib/llvmopencl/TargetAddressSpaces.cc create mode 100644 lib/llvmopencl/TargetAddressSpaces.h create mode 100644 lib/llvmopencl/VariableUniformityAnalysis.cc create mode 100644 lib/llvmopencl/VariableUniformityAnalysis.h create mode 100644 lib/llvmopencl/WIVectorize.cc create mode 100644 lib/llvmopencl/WorkItemAliasAnalysis.cc create mode 100644 lib/llvmopencl/Workgroup.cc create mode 100644 lib/llvmopencl/Workgroup.h create mode 100644 lib/llvmopencl/WorkitemHandler.cc create mode 100644 lib/llvmopencl/WorkitemHandler.h create mode 100644 lib/llvmopencl/WorkitemHandlerChooser.cc create mode 100644 lib/llvmopencl/WorkitemHandlerChooser.h create mode 100644 lib/llvmopencl/WorkitemLoops.cc create mode 100644 lib/llvmopencl/WorkitemLoops.h create mode 100644 lib/llvmopencl/WorkitemReplication.cc create mode 100644 lib/llvmopencl/WorkitemReplication.h create mode 100644 lib/llvmopencl/linker.cpp create mode 100644 lib/llvmopencl/linker.h create mode 100644 lib/poclu/Makefile.am create mode 100644 lib/poclu/Makefile.in create mode 100644 lib/poclu/bswap.c create mode 100644 lib/poclu/cl_half.c create mode 100644 lib/poclu/misc.c create mode 100644 m4/ax_boost_base.m4 create mode 100644 m4/libtool.m4 create mode 100644 m4/ltoptions.m4 create mode 100644 m4/ltsugar.m4 create mode 100644 m4/ltversion.m4 create mode 100644 m4/lt~obsolete.m4 create mode 100644 ocl-vendors/pocl-tests.icd.in create mode 100644 pocl.icd.in create mode 100644 pocl.pc.in create mode 100644 scripts/Makefile.am create mode 100644 scripts/Makefile.in create mode 100644 scripts/pocl-standalone.in create mode 100644 tests/Makefile.am create mode 100644 tests/Makefile.in create mode 100644 tests/atlocal.in create mode 100644 tests/cell/Makefile.am create mode 100644 tests/cell/Makefile.in create mode 100644 tests/cell/hello/Makefile.am create mode 100644 tests/cell/hello/Makefile.in create mode 100644 tests/cell/hello/host.cpp create mode 100644 tests/kernel/Makefile.am create mode 100644 tests/kernel/Makefile.in create mode 100644 tests/kernel/image_query_funcs.c create mode 100644 tests/kernel/kernel.c create mode 100644 tests/kernel/sampler_address_clamp.c create mode 100644 tests/kernel/test_as_type.cl create mode 100644 tests/kernel/test_bitselect.cl create mode 100644 tests/kernel/test_block.cl create mode 100644 tests/kernel/test_convert_sat_regression.cl create mode 100644 tests/kernel/test_convert_type_1.cl create mode 100644 tests/kernel/test_convert_type_16.cl create mode 100644 tests/kernel/test_convert_type_2.cl create mode 100644 tests/kernel/test_convert_type_4.cl create mode 100644 tests/kernel/test_convert_type_8.cl create mode 100644 tests/kernel/test_fabs.cl create mode 100644 tests/kernel/test_fmin_fmax_fma.cl create mode 100644 tests/kernel/test_hadd.cl create mode 100644 tests/kernel/test_image_query_funcs.cl create mode 100644 tests/kernel/test_min_max.cl create mode 100644 tests/kernel/test_printf.cl create mode 100644 tests/kernel/test_rotate.cl create mode 100644 tests/kernel/test_sampler_address_clamp.cl create mode 100644 tests/kernel/test_short16.cl create mode 100644 tests/kernel/test_shuffle.cc create mode 100644 tests/kernel/test_sizeof.cl create mode 100644 tests/package.m4 create mode 100644 tests/regression/Makefile.am create mode 100644 tests/regression/Makefile.in create mode 100644 tests/regression/test_assign_loop_variable_to_privvar_makes_it_local.c create mode 100644 tests/regression/test_assign_loop_variable_to_privvar_makes_it_local_2.c create mode 100644 tests/regression/test_barrier_before_return.cpp create mode 100644 tests/regression/test_barrier_between_for_loops.cpp create mode 100644 tests/regression/test_constant_array.cpp create mode 100644 tests/regression/test_early_return.cpp create mode 100644 tests/regression/test_for_with_var_iteration_count.cpp create mode 100644 tests/regression/test_fors_with_var_iteration_counts.cpp create mode 100644 tests/regression/test_id_dependent_computation.cpp create mode 100644 tests/regression/test_infinite_loop.cpp create mode 100644 tests/regression/test_locals.cpp create mode 100644 tests/regression/test_loop_phi_replication.cpp create mode 100644 tests/regression/test_multi_level_loops_with_barriers.cpp create mode 100644 tests/regression/test_null_arg.cpp create mode 100644 tests/regression/test_setargs.cpp create mode 100644 tests/regression/test_simple_for_with_a_barrier.cpp create mode 100644 tests/regression/test_structs_as_args.cpp create mode 100644 tests/regression/test_undominated_variable.cpp create mode 100644 tests/regression/test_vectors_as_args.cpp create mode 100644 tests/runtime/Makefile.am create mode 100644 tests/runtime/Makefile.in create mode 100644 tests/runtime/clGetKernelArgInfo.spir32_meta create mode 100644 tests/runtime/clGetKernelArgInfo.spir32_nometa create mode 100644 tests/runtime/clGetKernelArgInfo.spir64_meta create mode 100644 tests/runtime/clGetKernelArgInfo.spir64_nometa create mode 100644 tests/runtime/test_clBuildProgram.c create mode 100644 tests/runtime/test_clCreateKernel.c create mode 100644 tests/runtime/test_clCreateKernelsInProgram.c create mode 100644 tests/runtime/test_clCreateKernelsInProgram.cl create mode 100644 tests/runtime/test_clCreateProgramWithBinary.c create mode 100644 tests/runtime/test_clEnqueueNativeKernel.c create mode 100644 tests/runtime/test_clFinish.c create mode 100644 tests/runtime/test_clGetDeviceInfo.c create mode 100644 tests/runtime/test_clGetEventInfo.c create mode 100644 tests/runtime/test_clGetKernelArgInfo.c create mode 100644 tests/runtime/test_clGetSupportedImageFormats.c create mode 100644 tests/runtime/test_clSetEventCallback.c create mode 100644 tests/runtime/test_data/test_kernel_src_in_another_dir.h create mode 100644 tests/runtime/test_kernel_src_in_pwd.h create mode 100644 tests/runtime/test_version.c create mode 100644 tests/tce/Makefile.am create mode 100644 tests/tce/Makefile.in create mode 100644 tests/tce/fp16/Makefile.am create mode 100644 tests/tce/fp16/Makefile.in create mode 100644 tests/tce/fp16/host.cpp create mode 100644 tests/tce/tcemc/Makefile.am create mode 100644 tests/tce/tcemc/Makefile.in create mode 100644 tests/tce/tcemc/host.cpp create mode 100644 tests/tce/ttasim/Makefile.am create mode 100644 tests/tce/ttasim/Makefile.in create mode 100644 tests/tce/ttasim/host.cpp create mode 100755 tests/testsuite create mode 100644 tests/testsuite-amd.at create mode 100644 tests/testsuite-amdsdk2_9.at create mode 100644 tests/testsuite-parboil.at create mode 100644 tests/testsuite-piglit.at create mode 100644 tests/testsuite-regression.at create mode 100644 tests/testsuite-rodinia.at create mode 100644 tests/testsuite-runtime.at create mode 100644 tests/testsuite-samples.at create mode 100644 tests/testsuite-tce.at create mode 100644 tests/testsuite-vexcl.at create mode 100644 tests/testsuite-viennacl.at create mode 100644 tests/testsuite-workgroup.at create mode 100644 tests/testsuite.at create mode 100644 tests/workgroup/Makefile.am create mode 100644 tests/workgroup/Makefile.in create mode 100644 tests/workgroup/basic_barriers.cl create mode 100644 tests/workgroup/basic_barriers_2_2_2_2.stdout create mode 100644 tests/workgroup/cond_barriers_1_2_1_1.stdout create mode 100644 tests/workgroup/conditional_barriers.cl create mode 100644 tests/workgroup/for_bug.cl create mode 100644 tests/workgroup/for_bug_1_2_1_1.stdout create mode 100644 tests/workgroup/forloops.cl create mode 100644 tests/workgroup/forloops_2_2_1_1.stdout create mode 100644 tests/workgroup/implicit_barriers.cl create mode 100644 tests/workgroup/implicit_barriers_1_2_1_1.stdout create mode 100644 tests/workgroup/loopbarriers.cl create mode 100644 tests/workgroup/loopbarriers_2_2_1_1.stdout create mode 100644 tests/workgroup/multilatch_bloop.cl create mode 100644 tests/workgroup/multilatch_bloop_1_3_1_1.stdout create mode 100644 tests/workgroup/outerlooppar.cl create mode 100644 tests/workgroup/outerlooppar_2_2_1_1.stdout create mode 100644 tests/workgroup/print_all_ids.cl create mode 100644 tests/workgroup/print_all_ids_114114.txt create mode 100644 tests/workgroup/run_kernel.c create mode 100644 tests/workgroup/tricky_for.cl create mode 100644 tests/workgroup/tricky_for_1_2_1_1.stdout create mode 100644 tools/data/test_machine.adf create mode 100644 tools/data/test_machine_fp16.adf create mode 100644 tools/patches/khronos_cl.hpp.patch diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..0ef427c --- /dev/null +++ b/CHANGES @@ -0,0 +1,324 @@ +0.10 August 2014 +================ + +This lists only the most interesting changes. Please +refer to the version control log for a full listing. + +Highlights +---------- +- Support for LLVM/Clang 3.5 +- Support for building using CMake (experimental with known issues). + +Bugfixes +-------- +- TCE: kernel building was broken when running pocl + from install location +- thread-safety (as required since OpenCL 1.1) improved + +Kernel compiler +--------------- +- Final code generation now done via LLVM API calls instead of + calling the llc binary. +- Sensible linking of functions from the monolithic kernel built-in + library. Major compilation speedup for smaller kernels. + +OpenCL C Builtin Function Implementations +----------------------------------------- +- Improved support for halfN functions. +- ilogb and ldexp available with vecmathlib + +OpenCL Runtime/Platform API support +----------------------------------- +- Implement clCreateKernelsInProgram() +- OpenCL-C shuffle() and shuffle2() implementation added +- Device probing modified to allow for device driver to detect device during + runtime. POCL_DEVICES still supported. +- Checks in clSetKernelArgs() for argument validity +- Checks in clEnqueueNDRange() for arguments to be all set +- Implement clGetKernelArgInfo() +- clEnqueueCopyImage() + +Misc +---- +- ViennaCL testsuite updated to 1.5.1 + +0.9 January 2014 +================ + +This lists only the most interesting changes. Please +refer to the version control log for a full listing. + +Highlights +---------- +- Major improvements to the kernel compiler's vectorization + performance. Twofold speedups in some benchmarks +- Support for most of the piglit CL tests + +OpenCL Runtime/Platform API support +----------------------------------- +- clCreateImage2D() and clCreateImage3D() implementation moved to + clCreateImage() +- Image creation now uses clCreateBuffer() +- clBuildProgram: Propagate the supported -cl* compiler options to Clang's + OpenCL frontend. +- clFinish: works with commands with event wait lists. +- Preliminary support for OpenCL 2.0 blocks +- Added support for clEnqueueNativeKernel() + +Builtin Function Implementations (OpenCL 1.2 Section 6.12) +---------------------------------------------------------- +- Refactored read/write_image()-functions to support refactored device image + object. (Only functions used by SimpleImage test) +- Introduced new macro based implementation for read/write_image()-functions +- Added sampler implementation for CLK_ADDRESS_CLAMP and + CLK_ADDRESS_CLAMP_TO_EDGE (Only integer coords supported) +- Most of the printf() format strings now works. Missing features: + - long on 32-bit architectures + +Performance Improvements +------------------------ +- Kernel compiler now tries to avoid replicating uniform variables, + this leads to less context data to be saved per work-item and cleaner + kernel bitcode for later optimizations +- Use a precompiled header for OpenCL C builtin declarations to speed up + the kernel compilation +- Kernel compiler vectorization optimizations: + - Inject implicit barriers both to loop starts and ends to + horizontally vectorize the inner loop. + - Reduce "peeling" by minimizing the conditional barrier region + by injecting implicit barrier close to the branch points for + conditional barrier cases. + - Breaking of vector datatypes for more efficient loop + vectorization. + - Support LLVM 3.4 parallel loop metadata. + +Misc +---- +- Explicitly specify the target architecture/CPU for the + kernel complier. +- Kernel compiler frontend defaults to implementation using LLVM API + directly instead of the scripts. +- __OPENCL_VERSION__ defined to 120 +- poclu: helpers for converting between the C float and OpenCL cl_half + types +- clEnqueueNativeKernel implemented +- Static and cmake-builds of LLVM can now be used. + +Bugfixes +-------- +- Correct isequal, isnan, and similar routines + +0.8 August 2013 +================ + +This lists only the most interesting changes. Please +refer to the version control log for a full listing. + +Overall +------- + +- Added support for LLVM/Clang 3.3. +- Dropped support for LLVM/Clang v3.1. +- Removed the depedency on llvm-ld (which was copied to + pocl-llvm-ld to pocl tree). Now uses llvm-link instead. +- Project renamed to Portable Computing Language (pocl). +- Luxmark v2.0 now works. +- x86_64 can now use efficient math built-in function + implementations from the vecmathlib project to avoid libm + calls and to exploit the SIMD instructions more efficiently + in case of vector datatypes in the kernel. +- Parallelize kernel inner loops "horizontally", if possible. + This converts possibly sequential inner kernel loops to parallel + loops by effectively performing "loop interchange" of the + work-item loop and the kernel's inner loop. +- Added VexCL tests to the test suite. All but one of them + work with pocl. + +Major bugfixes +-------------- +- Fixed passing NULL as a buffer argument to clSetKernelArg + (this time with a regression test added). +- Constant BitCast expressions broken to variables to avoid + crashing when copying a kernel with casts on automatic + local pointers. +- Fixes for i386/i686. Tested on Pentium4/Ubuntu 10.04 LTS. +- Lots of API error checking added (found by the Piglit testing suite). +- Fixed bug in select producing incorrect results when the third + conditional argument is an unsigned scalar or vector. +- Replaced deprecated SSE 4.1 assembly mneunomics in x86-64 min/max + kernel functions that have since been removed in more recent + versions of gas and llvm-as. +- SPIR/LLVM IR 'byval' attributes are now handled correctly on + kernel function arguments, allowing for structs and oversized + vectors to be passed in with value semantics. +- Fixed to work with the latest Khronos OpenCL headers for 1.2. + Some issues fixed with the new cl.hpp. +- The ICD dispatch table was too small which might have caused + "interesting" behavior when calling the later functions in + the table and not using ocl-icd as the dispatcher. +- Several kernel compiler bugs fixed. +- A multithreaded host application could free the same object + multiple times due to a race issue. + +Platform Layer implementations (OpenCL 1.2 Chapter 4) +----------------------------------------------------- +- Return correctly formatted CL_DEVICE_VERSION and + CL_DEVICE_OPENCL_C_VERSION. +- clGetDeviceInfo: Use the 'cpufreq' sys interface of Linux for + querying the CPU clock frequency, if available. + +The OpenCL Runtime (OpenCL 1.2 Chapter 5) +----------------------------------------- +- clGetEventInfo: Querying the command type, command queue, + and the reference count of the event. + +Builtin Function Implementations (OpenCL 1.2 Section 6.12) +---------------------------------------------------------- +- convert_type* builtins now generated with a Python script by + Victor Oliveira. +- length() fingerprint was assuming two arguments instead of one. +- The kernel bitcode library is now optimized when built in pocl. Speeds + up kernel optimization for cases which use the kernel functions + a lot. +- Fix mul_hi() implementation + +ICD +--- +- Fixed pocl tests to work when executed through the Khronos + supplied icd loader (needs a patch applied to the loader be able to + override the .icd search path). + +Misc. +----- +- Fix to the helper script search logic: + Search from the BUILDDIR only if env POCL_BUILDING is defined. + Otherwise search from PKGDATADIR first, then from the PATH. +- Fixed memory leaks in clCreateContext* and clCreateKernel +- Ensured that stored arguments are adequately aligned in + clSetKernelArg and clEnqueueNDRangeKernel. + +0.7 January 2013 +================= + +This lists only the most interesting changes. Please +refer to the version control log for a full listing. + +Overall +------- +- Support for LLVM 3.2. +- Multi-WI work group functions can be now generated + using loops which are only partially unrolled. Reduces + code size explosion with large WGs in comparison to + the full replication method. +- PowerPC 64 support (tested on Cell/Debian Sid/PS3). +- PowerPC 32 support (tested on Cell/Debian Sid/PS3). +- ARM v7 support (on Linux) +- Beginning of Cell SPU support (very experimental!). +- Most of the AMD APP SDK OpenCL examples now work and have been + added to the pocl test suite. +- Most of the Parboil benchmark cases added to the test + suite. + +Kernel Compiler Passes +---------------------- +- Several miscompilations and compiler crashes fixed. +- Multiple bugs fixed from the work group vectorizer. +- Updated metadata format pocl uses to pass information + to vectorization and TCE backend to simplify debuging. +- Kernel pointer arguments are not always marked 'noalias' (restricted). + Doing this previously was a specs misunderstanding. +- ConstantGEPs to static variables generated from automated + locals caused problems. Now converting them to normal GEPs + using a pass from the SAFECode project. + +OpenCL Platform Layer implementations (OpenCL 1.2 Chapter 4) +------------------------------------------------------- +- clGetDeviceInfo now uses the hwloc lib for device property + queries. Many new queries implemented. +- clGetKernelInfo (initial implementation) +- clGetMemObjectInfo (initial implementation) +- clGetCommandQueueInfo (initial implementation) +- clReleaseDevice +- clRetainDevice +- Proper freeing of devices in clReleaseContext + +The OpenCL Runtime Implementations (OpenCL 1.2 Chapter 5) +--------------------------------------------------------- +- clBuildProgram: support for passing options to the compiler. +- clEnqueueMarker + +OpenCL C Builtin Function Implementations (OpenCL 1.2 Section 6.12) +------------------------------------------------------------------- +- Atomic Functions (6.12.11) +- get_global_offset() was not linked correctly + +Framework +--------- + +- Made it possible to override the .cl -> .bc build command + called by clBuildProgram per device. + +Device Drivers +-------------- + +- pthread/basic: + * extract CPU clock frequency from /proc/cpuinfo, if available + * return cl_khr_fp64 if doubles supported by the CPU +- ttasim: support for explicitly calling custom/special operations + through the vendor extensions API + +Misc. +----- + +- Fixes for MacOSX builds. +- Fixed passing NULL as a buffer argument to clSetKernelArguments +- Fixed a major bug when launching the same kernel multiple times: + the arguments very not copied to the command object. +- Fixed several issues with ICD, it is now considered stable to be + used by default. + +0.6 August 2012 +================= + +Kernel library +-------------- + +- Added initial optimized kernel library for X86_64/SSE. +- Preliminary support for ARM architectures on Linux + (briefly tested on MeeGo/Nokia N9). + +Pthread device driver +--------------------- + +- Multithreading at the work group granularity using pthreads. +- Tries to figure out the optimal maximum number of + threads for the system based on the available hardware + threads. Currently works only in Linux using the + /proc/cpuinfo interface. +- Region-based customized memory allocator for speeding up buffer + allocations. + +Kernel compiler +--------------- + +- Most of the tricky work group barrier cases (barriers inside + for-loops etc) now supported. +- Support for local variables, also automatic locals. +- Reuse previous compilation results, if available. +- Automatic vectorization of work groups (multiple work items + in parallel). + +Miscellaneous +------------- +- Installable Client Driver (icd) support. +- Event profiling support (incomplete, works only for kernel and + buffer read/write/map/unmap events). + +Known issues +------------ + +- Non-pointer struct kernel arguments fail due to varying ABIs + * https://bugs.launchpad.net/pocl/+bug/987905 +- Produces always "fully unrolled" chains of work items for + work groups causing code size explosion for large WGs. + diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..1a56b34 --- /dev/null +++ b/COPYING @@ -0,0 +1,19 @@ +Copyright (c) 2011 pocl developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..e264cfa --- /dev/null +++ b/INSTALL @@ -0,0 +1,429 @@ +Requirements +============ + +In order to build pocl, you need the following support libraries and +tools: + + * LLVM & Clang + * GNU make + * libtool dlopen wrapper files (e.g. libltdl3-dev in Debian) + * pthread (should be installed by default) + * hwloc v1.0 or newer (e.g. libhwloc-dev) + * pkg-config + * autotools or cmake + +IMPORTANT NOTE! In order to use LLVM with pocl you need to configure LLVM +with '--enable-shared' switch and some platforms also require that you +compile it with 'make REQUIRES_RTTI=1', as follows: + + ./configure --enable-shared --prefix=YOUR_INSTALLATION_PREFIX_HERE + make REQUIRES_RTTI=1 && make install + +Build using autotools +--------------------- + +After all the requirements are installed. The installation procedure +follows the usual autotools build+install. If you are using a development +source tree, you need to generate the autotool build files with + + "./autogen.sh". + +NOTE: automake 1.11 is known to work, + automake 1.96 might not work + +Build using cmake +----------------- +Cmake version 2.8.12 or higher is required. + +NOTE cmake buildsystem in pocl is not feature complete (compared to autotools); +in particular, if you want to use external testsuites, you have to build using +autotools. For more information on current status of cmake in pocl, +see https://github.com/pocl/pocl/wiki/CMake-status + +The build+install is the usual cmake way: + cd + mkdir build + cd build + cmake [-D